In [6]:
import os
import pdfplumber
import fpdf
import json
import openai
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
API_KEY=os.getenv("API_KEY")
openAI_API_KEY=os.getenv("openAI_API_KEY")

## Parsing through openAI

In [None]:
import openai
import pdfplumber
import json

# STEP 1: Load PDF and extract text
def extract_text_from_pdf(file_path):
    with pdfplumber.open(file_path) as pdf:
        return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

# STEP 2: Define JSON schema and prompt
def build_prompt(resume_text):
    json_schema = {
        "name": "",
        "email": "",
        "phone": "",
        "country": "",
        "city": "",
        "summary": "",
        "skills": [
            {
                'specialized skill': "",
                'common skill': ""
                }
            ],
        "experience": [
            {
                "job_title": "",
                "company": "",
                "start_date": "",
                "end_date": "",
                "description": ""
            }
        ],
        "education": [
            {
                "degree": "",
                "institution": "",
                "start_year": "",
                "end_year": ""
            }
        ],
        "enrichment parameters": [
            {
                "Employment Pattern & Progression": "",
                "Company Type & Sector": "",
                "Education Quality & Ranking": "",
                "Skill Demand & Market Relevance": "",
                "Leadership Experience": "",
                "Budget & Project Management": "",
                "International Experience & Mobility": "",
                "Soft Skills from Sales Calls": "",
                "Personality & Behavioral Traits": "",
                "Future Career Goals (Sales-Inferred)": "",
                "Salary Expectations (Sales-Inferred)": "",
                "JD Enrichment with Implied Preferences": "",
                "Cultural Fit Indicators": ""
            }
        ]
    }

    prompt = f"""
You are an expert resume parser. Convert the resume text below into this JSON format. Fill in all the relevant fields. Leave the enrichment_parameters field empty.
The JSON schema is as follows:

{json.dumps(json_schema, indent=2)}

Resume:
\"\"\"
{resume_text}
\"\"\"
"""
    return prompt

# STEP 3: Call OpenAI API
def call_openai(prompt):
    api_key = openAI_API_KEY
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    #return response['choices'][0]['message']['content']
    return response.choices[0].message.content

# STEP 4: Main function
def main():
    pdf_path = "./sample_CVs/John_Doe_CV.pdf"  # Change if needed
    resume_text = extract_text_from_pdf(pdf_path)
    prompt = build_prompt(resume_text)
    parsed_cv = call_openai(prompt)

    print("Parsed CV:")
    print(parsed_cv)

    #Optionally save to file
    with open("./gpt_parsed_CVs/John_Doe_parsed_cv.json", "w") as f:
        f.write(parsed_cv)

if __name__ == "__main__":
    main()


Parsed CV:
{
  "name": "John Doe",
  "email": "johndoe@email.com",
  "phone": "+1-234-567-8901",
  "country": "",
  "city": "",
  "summary": "Seasoned finance executive with 18+ years of experience in financial strategy, risk management, and capital raising. Expertise in scaling startups and optimizing financial operations to drive profitability and growth.",
  "skills": [
    {
      "specialized skill": "Financial Strategy & Planning, Risk Management, Mergers & Acquisitions, Venture Capital & Fundraising, Budgeting & Forecasting",
      "common skill": ""
    }
  ],
  "experience": [
    {
      "job_title": "Chief Financial Officer",
      "company": "XYZ Tech Solutions",
      "start_date": "2018",
      "end_date": "Present",
      "description": "Spearheaded fundraising efforts, securing $50M in venture capital funding. Optimized financial structures, reducing operational costs by 30%. Led M&A initiatives, successfully acquiring and integrating 3 companies."
    },
    {
      "j

## DEEPSEEK

In [14]:
from openai import OpenAI
import json

# filepath: /Users/prayagsharma/Documents/ACP/scripts/deepseek_enrichment.py
def enrich_cv_with_deepseek(api_key, cv_data):
    """
    Enriches the CV data using the DeepSeek reasoning model to fill in empty enrichment parameters.

    Args:
        api_key (str): The API key for the DeepSeek API.
        cv_data (dict): The CV data to be enriched.

    Returns:
        dict: The enriched CV data with filled enrichment parameters.
    """
    # Initialize the OpenAI client with the DeepSeek API key
    client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")

    # Round 1: Send the initial prompt
    messages = [
        {"role": "user", "content": """
        Analyze the provided CV data and infer the following enrichment parameters:
        - Employment Pattern & Progression: Describe the career trajectory and progression.
        - Company Type & Sector: Identify the type and sector of companies worked for.
        - Education Quality & Ranking: Assess the quality and ranking of educational institutions.
        - Skill Demand & Market Relevance: Evaluate the relevance of skills in the current market.
        - Leadership Experience: Highlight leadership roles and responsibilities.
        - Budget & Project Management: Detail experience in managing budgets and projects.
        - International Experience & Mobility: Indicate international exposure and mobility.
        - Soft Skills from Sales Calls: Infer soft skills demonstrated in sales or communication.
        - Personality & Behavioral Traits: Deduce personality traits and behaviors.
        - Future Career Goals (Sales-Inferred): Predict future career aspirations based on sales roles.
        - Salary Expectations (Sales-Inferred): Estimate salary expectations based on experience.
        - JD Enrichment with Implied Preferences: Enrich job descriptions with implied preferences.
        - Cultural Fit Indicators: Suggest cultural fit indicators for potential roles.
        """}
    ]
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=messages
    )

    # Extract the assistant's response
    assistant_response = response.choices[0].message.content
    messages.append({"role": "assistant", "content": assistant_response})

    # Round 2: Send the CV data
    messages.append({"role": "user", "content": f"Here is the CV data: {json.dumps(cv_data)}. Please analyze and fill in the enrichment parameters. return the enriched CV data in JSON format."})
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=messages,
        response_format={
        'type': 'json_object'
    }
    )

    # Extract the enriched CV data from the assistant's response
    enriched_cv_content = response.choices[0].message.content
    enriched_cv = json.loads(enriched_cv_content)  # Assuming the response contains the enriched CV data in JSON format
    return enriched_cv


# Example usage
if __name__ == "__main__":
    # Load the CV data from cv_0.json
    with open("/Users/prayagsharma/Documents/ACP/Affinda_parsed_CVs/cv_0.json", "r") as file:
        cv_data = json.load(file)

    # DeepSeek API key
    api_key = API_KEY

    # Enrich the CV data
    try:
        enriched_cv = enrich_cv_with_deepseek(api_key, cv_data)
        # Merge the enriched CV data into the original CV data
        cv_data['enrichment parameters'] = enriched_cv['enrichment parameters']
        # Save the updated cv_data to a new file
        with open("/Users/prayagsharma/Documents/ACP/gpt_enriched_CVs/cv_0_updated.json", "w") as newfile:
            json.dump(cv_data, newfile, indent=4)
        # Save the enriched CV data back to the file
        with open("/Users/prayagsharma/Documents/ACP/gpt_enriched_CVs/cv_0_enriched_deepseek.json", "w") as file:
            json.dump(enriched_cv, file, indent=4)
        print("CV enrichment completed successfully.")
    except Exception as e:
        print(f"Error during CV enrichment: {e}")

CV enrichment completed successfully.


## OPENAI

In [15]:
from openai import OpenAI
import json

# filepath: /Users/prayagsharma/Documents/ACP/scripts/openai_enrichment.py
def enrich_cv_with_openai(api_key, cv_data):
    """
    Enriches the CV data using the OpenAI API to fill in empty enrichment parameters.

    Args:
        api_key (str): The API key for the OpenAI API.
        cv_data (dict): The CV data to be enriched.

    Returns:
        dict: The enriched CV data with filled enrichment parameters.
    """
    # Initialize the OpenAI client
    client = OpenAI(api_key=api_key)

    # Round 1: Send the initial prompt
    messages = [
        {"role": "user", "content": """
        Analyze the provided CV data and infer the following enrichment parameters:
        - Employment Pattern & Progression: Describe the career trajectory and progression.
        - Company Type & Sector: Identify the type and sector of companies worked for.
        - Education Quality & Ranking: Assess the quality and ranking of educational institutions.
        - Skill Demand & Market Relevance: Evaluate the relevance of skills in the current market.
        - Leadership Experience: Highlight leadership roles and responsibilities.
        - Budget & Project Management: Detail experience in managing budgets and projects.
        - International Experience & Mobility: Indicate international exposure and mobility.
        - Soft Skills from Sales Calls: Infer soft skills demonstrated in sales or communication.
        - Personality & Behavioral Traits: Deduce personality traits and behaviors.
        - Future Career Goals (Sales-Inferred): Predict future career aspirations based on sales roles.
        - Salary Expectations (Sales-Inferred): Estimate salary expectations based on experience.
        - JD Enrichment with Implied Preferences: Enrich job descriptions with implied preferences.
        - Cultural Fit Indicators: Suggest cultural fit indicators for potential roles.
        """}
    ]
    response = client.chat.completions.create(
        model="o3-mini-2025-01-31",
        messages=messages
    )

    # Extract the assistant's response
    assistant_response = response.choices[0].message.content
    messages.append({"role": "assistant", "content": assistant_response})

    # Round 2: Send the CV data
    messages.append({"role": "user", "content": f"Here is the CV data: {json.dumps(cv_data)}. Please analyze and fill in the enrichment parameters. Return the enriched CV data in JSON format."})
    response = client.chat.completions.create(
        model="o3-mini-2025-01-31",
        messages=messages,
        response_format={"type": "json_object"}
    )

    # Extract the enriched CV data from the assistant's response
    enriched_cv_content = response.choices[0].message.content
    enriched_cv = json.loads(enriched_cv_content)  # Assuming the response contains the enriched CV data in JSON format
    return enriched_cv


# Example usage
if __name__ == "__main__":
    # Load the CV data from cv_0.json
    with open("/Users/prayagsharma/Documents/ACP/Affinda_parsed_CVs/cv_0.json", "r") as file:
        cv_data = json.load(file)

    # OpenAI API key
    api_key = openAI_API_KEY

    # Enrich the CV data
    try:
        enriched_cv = enrich_cv_with_openai(api_key, cv_data)
        # Save the enriched CV data back to the file
        with open("/Users/prayagsharma/Documents/ACP/gpt_enriched_CVs/cv_0_enriched_openAI.json", "w") as file:
            json.dump(enriched_cv, file, indent=4)
        print("CV enrichment completed successfully.")
    except Exception as e:
        print(f"Error during CV enrichment: {e}")

CV enrichment completed successfully.


### Parse all CVS through openAI

In [9]:

# STEP 1: Load PDF and extract text
def extract_text_from_pdf(file_path):
    with pdfplumber.open(file_path) as pdf:
        return "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

# STEP 2: Define JSON schema and prompt
def build_prompt(resume_text):
    json_schema = {
        "name": "",
        "email": "",
        "phone": "",
        "country": "",
        "city": "",
        "summary": "",
        "skills": [
            {
                'specialized skill': "",
                'common skill': ""
            }
        ],
        "experience": [
            {
                "job_title": "",
                "company": "",
                "start_date": "",
                "end_date": "",
                "description": ""
            }
        ],
        "education": [
            {
                "degree": "",
                "institution": "",
                "start_year": "",
                "end_year": ""
            }
        ],
        "enrichment parameters": [
            {
                "Employment Pattern & Progression": "",
                "Company Type & Sector": "",
                "Education Quality & Ranking": "",
                "Skill Demand & Market Relevance": "",
                "Leadership Experience": "",
                "Budget & Project Management": "",
                "International Experience & Mobility": "",
                "Soft Skills from Sales Calls": "",
                "Personality & Behavioral Traits": "",
                "Future Career Goals (Sales-Inferred)": "",
                "Salary Expectations (Sales-Inferred)": "",
                "JD Enrichment with Implied Preferences": "",
                "Cultural Fit Indicators": ""
            }
        ]
    }

    prompt = f"""
You are an expert resume parser. Convert the resume text below into this JSON format. Fill in all the relevant fields. Leave the enrichment_parameters field empty.
The JSON schema is as follows:

{json.dumps(json_schema, indent=2)}

Resume:
\"\"\"
{resume_text}
\"\"\"
"""
    return prompt

# STEP 3: Call OpenAI API
def call_openai(prompt):
    openai.api_key = openAI_API_KEY
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message.content

# STEP 4: Main function to process all PDFs
def main():
    input_folder = "./sample_CVs"
    output_folder = "./gpt_parsed_CVs"

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".pdf"):  # Ignore non-PDF files
            pdf_path = os.path.join(input_folder, file_name)
            print(f"Processing: {file_name}")

            # Extract text from PDF
            resume_text = extract_text_from_pdf(pdf_path)

            # Build prompt and call OpenAI
            prompt = build_prompt(resume_text)
            parsed_cv = call_openai(prompt)

            # Save the parsed CV to the output folder
            output_file = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}_parsed.json")
            with open(output_file, "w") as f:
                f.write(parsed_cv)

            print(f"Saved parsed CV to: {output_file}")

if __name__ == "__main__":
    main()

Processing: Lisa_Green.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Lisa_Green_parsed.json
Processing: Mark_Reynolds.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Mark_Reynolds_parsed.json
Processing: John_Doe_CV.pdf
Saved parsed CV to: ./gpt_parsed_CVs/John_Doe_CV_parsed.json
Processing: Kevin_Adams.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Kevin_Adams_parsed.json
Processing: Michael_Johnson_CV.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Michael_Johnson_CV_parsed.json
Processing: Sophia_Martinez.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Sophia_Martinez_parsed.json
Processing: Sarah_Lee_CV.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Sarah_Lee_CV_parsed.json
Processing: Daniel_Carter.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Daniel_Carter_parsed.json
Processing: Jane_Smith_CV.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Jane_Smith_CV_parsed.json
Processing: Robert_Brown_CV.pdf
Saved parsed CV to: ./gpt_parsed_CVs/Robert_Brown_CV_parsed.json


## Fill enriched Parameters

In [14]:


# STEP 1: Build enrichment prompt
def build_enrichment_prompt(cv_data):
    prompt = f"""
You are an expert in CV enrichment. Analyze the provided CV data and infer the following enrichment parameters:
- Employment Pattern & Progression: Describe the career trajectory and progression.
- Company Type & Sector: Identify the type and sector of companies worked for.
- Education Quality & Ranking: Assess the quality and ranking of educational institutions.
- Skill Demand & Market Relevance: Evaluate the relevance of skills in the current market.
- Leadership Experience: Highlight leadership roles and responsibilities.
- Budget & Project Management: Detail experience in managing budgets and projects.
- International Experience & Mobility: Indicate international exposure and mobility.
- Soft Skills from Sales Calls: Infer soft skills demonstrated in sales or communication.
- Personality & Behavioral Traits: Deduce personality traits and behaviors.
- Future Career Goals (Sales-Inferred): Predict future career aspirations based on sales roles.
- Salary Expectations (Sales-Inferred): Estimate salary expectations based on experience.
- JD Enrichment with Implied Preferences: Enrich job descriptions with implied preferences.
- Cultural Fit Indicators: Suggest cultural fit indicators for potential roles.

Here is the CV data:
{json.dumps(cv_data, indent=2)}

Please analyze and fill in the enrichment parameters. Return the enriched CV data in JSON format.
"""
    return prompt

# STEP 2: Call OpenAI API for enrichment
def call_openai_for_enrichment(prompt):
    openai.api_key = openAI_API_KEY
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    enriched_cv_content = response.choices[0].message.content
    return json.loads(enriched_cv_content)  # Assuming the response is valid JSON

# STEP 3: Main function to process all parsed CVs
def main():
    input_folder = "./gpt_parsed_CVs"
    output_folder = "./gpt_enriched_CVs"

    # Ensure output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith(".json") and not file_name.startswith(("Daniel", "Jane", "John", "Kevin", "Lisa", "Michael", "Robert", "Sarah")):  # Process only JSON files not starting with specified names
                    input_file_path = os.path.join(input_folder, file_name)
                    print(f"Processing: {file_name}")

                    # Load the parsed CV data
                    with open(input_file_path, "r") as f:
                        cv_data = json.load(f)

                    # Build enrichment prompt and call OpenAI
                    prompt = build_enrichment_prompt(cv_data)
                    enriched_cv = call_openai_for_enrichment(prompt)

                    # Merge enrichment parameters into the original CV data
                    cv_data["enrichment parameters"] = enriched_cv.get("enrichment parameters", {})

                    # Save the enriched CV to the output folder
                    output_file_path = os.path.join(output_folder, file_name)
                    with open(output_file_path, "w") as f:
                        json.dump(cv_data, f, indent=4)

                    print(f"Saved enriched CV to: {output_file_path}")

if __name__ == "__main__":
    main()

Processing: Sophia_Martinez_parsed.json
Saved enriched CV to: ./gpt_enriched_CVs/Sophia_Martinez_parsed.json
Processing: Mark_Reynolds_parsed.json
Saved enriched CV to: ./gpt_enriched_CVs/Mark_Reynolds_parsed.json


## Flagging and questionnaire test

In [9]:
from fpdf import FPDF

# Load the JSON files
def load_json(file_path):
    with open(file_path, "r") as file:
        return json.load(file)

# Function to call OpenAI API
def call_openai(prompt):
    client= OpenAI(api_key=openAI_API_KEY)
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    return response.choices[0].message.content

# Function to extract missing points from the response
def extract_missing_points(response):
    # Split the response into lines
    lines = response.split("\n")
    missing_points = []

    # Look for the section with "Key Missing Information for CLO Role"
    start_extracting = False
    for line in lines:
        if "Key Missing Information for CLO Role" in line:
            start_extracting = True
            continue
        if start_extracting:
            # Extract points (assuming they are listed with a dash or number)
            if line.strip().startswith("-") or line.strip().startswith("•"):
                missing_points.append(line.strip("- ").strip())
            elif line.strip() == "":
                break  # Stop if there's an empty line after the points

    return missing_points

# Main function
def main():
    # Load the relevant JSON files
    candidate_cv = load_json("/Users/prayagsharma/Documents/ACP/gpt_enriched_CVs/Daniel_Carter_parsed.json")
    job_description = load_json("/Users/prayagsharma/Documents/ACP/JD_enriched/job_description_4_parsed.json")

    # Combine the JSON data into a single context
    context = f"""
    Candidate CV:
    {json.dumps(candidate_cv, indent=2)}

    Job Description:
    {json.dumps(job_description, indent=2)}
    """

    # Step 1: Ask questions 1, 2, and 3
    prompt_1_3 = f"""
    {context}

    Please answer the following questions:
    1. Could you please give me an overview of this candidate's CV?
    2. Could you expand on the missing information that you pointed out? Please explain why they should be important. (These can also be used as a base for questions, but they can be subjective to the interviewer, e.g., school grades or employment pattern.)
    3. This candidate is applying for the role of "Chief Legal Officer." Given the role, what key information is missing from his CV? Sum it up in 5 points.
    """
    response_1_3 = call_openai(prompt_1_3)
    print("Response to Questions 1, 2, and 3:")
    print(response_1_3)

    # Step 2: Extract the 5 missing points from the response
    print("\nExtracting missing points for follow-up questions...")
   # Parse the response to extract the missing points

    # Extract missing points from the response
    missing_points = extract_missing_points(response_1_3)

    flagging_output= "./flagging_output"
    # Ensure the "flagging_output" folder exists
    os.makedirs(flagging_output, exist_ok=True)
    # Save response_1_3 and missing_points as pdf
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    # Add content to the PDF
    pdf.multi_cell(0, 10, response_1_3)
    pdf.ln(10)  # Add a line break
    pdf.set_font("Arial", style="B", size=12)
    pdf.cell(0, 10, "Missing Points:", ln=True)
    pdf.set_font("Arial", size=12)
    for point in missing_points:
        pdf.multi_cell(0, 10, f"- {point}")
    # Save the PDF to the "flagging_output" folder
    output_file = os.path.join(flagging_output, "flagging_output.pdf")
    pdf.output(output_file)
    print(f"Flagging output saved as PDF to: {output_file}")
    print("Missing Points:")
    for point in missing_points:
        print(f"- {point}")
    
    # Step 3: Ask question 4 based on the missing points
    prompt_4 = f"""
    Based on the missing points identified earlier, please draw up a 5-question questionnaire to be asked by our Sales Team during an interview with the candidate. These questions should be formulated in a way that can give the candidate the possibility to explain why that information is missing. One question for each of the points mentioned below:

    {json.dumps(missing_points, indent=2)}
    """
    response_4 = call_openai(prompt_4)
    # Ensure the "questionnaire" folder exists
    output_folder = "./questionnaire"
    os.makedirs(output_folder, exist_ok=True)

    # Save the response as a PDF

    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    # Add content to the PDF
    pdf.multi_cell(0, 10, response_4)

    # Save the PDF to the "questionnaire" folder
    output_file = os.path.join(output_folder, "questionnaire.pdf")
    pdf.output(output_file)

    print(f"Questionnaire saved as PDF to: {output_file}")
    print("Response to Question 4:")
    print(response_4)

if __name__ == "__main__":
    main()

Response to Questions 1, 2, and 3:
1. The candidate, Daniel Carter, is a seasoned legal executive with over 20 years of experience in corporate governance, compliance, and risk management. He has specialized skills in corporate governance, compliance & risk management, mergers & acquisitions, contract negotiation, and intellectual property law. His professional experience includes roles as General Counsel at LegalPath and Chief Legal Officer at SecureLaw Inc. He has a Juris Doctor (JD) degree from Yale Law School and a B.A. in Political Science from Princeton University.

2. The missing information includes the candidate's country and city of residence, the start and end years of his education, and common skills. The country and city of residence are important to determine if the candidate is eligible or willing to relocate for the job. The start and end years of his education can provide insights into his age, which might be relevant for some roles. Common skills, while not as special