# EY AI challenge 2025 - CV analyzer bot

## Imports

In [53]:
import pandas as pd
from PyPDF2 import PdfReader
import os
import re

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
import json
import time

## Define model

In [None]:
os.environ["GOOGLE_API_KEY"] = "censored"


In [55]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [56]:
# Define prompt

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that evaluates CVs based on a job description.\n"
            "Rate the candidate on the following:\n"
            "- Experience: 0 to 60\n"
            "- Education: 0 to 30\n"
            "- Location Fit: 0 to 10 (considering porto-lisbon is a 5, lisbon-lisbon or porto-porto would be a 10, lisbon-bangkok would be a 0)\n"
            "- Overall: sum of the above (0 to 100)\n\n"
            "Respond ONLY in JSON format as:\n"
            "{{\n"
            "  \"experience\": <int>,\n"
            "  \"education\": <int>,\n"
            "  \"location\": <int>,\n"
            "  \"overall\": <int>\n"
            "}}"
        ),
        ("human", "Job description:\n{job_description}\n\nCV:\n{cv}"),
    ]
)

# Define chain
chain = prompt | llm | StrOutputParser()

## Read dataset

In [57]:

cv_folder = "../CVs/"
cv_files = [f for f in os.listdir(cv_folder) if f.endswith(".pdf")]

cvs = []

for file in cv_files:
    path = os.path.join(cv_folder, file)
    reader = PdfReader(path)
    text = "".join([page.extract_text() or "" for page in reader.pages])
    cvs.append({
        "name": os.path.splitext(file)[0],  # e.g., "cv_1"
        "cv": text.strip()
    })

In [58]:
cvs[0]

{'name': 'cv_47',
 'cv': 'cv_47.md 2025-05-02\n1 / 2Helena Santos\nTitle:  Healthcare Consultant\nLocation:  Porto, P ortugal\nEmail:  helena.santos@example.com\nPhone:  +351 912 345 678\nSummary\nHealthcare professional with background in hospital administration and 5 years of experience in healthcare\noperations improvement. Expertise in process optimization, quality management, and healthcare analytics.\nSeeking to leverage industry knowledge in a Big4 healthcare consulting practice.\nExperience\nHospital de São Jo ão – P orto\nOperations Manager\nMay 2021 – Present\nLed operational excellence initiatives resulting in 15% reduction in patient waiting times.\nImplemented quality management systems aligned with international healthcare standards.\nManaged cross-functional teams to improve resource allocation and department coordination.\nCUF Hospitals – Lisbon\nHealthcare Analyst\nFebruary 2019 – April 2021\nConducted performance analyses of clinical departments to identify improvemen

In [59]:

job_folder = "../JobDescriptions/"
job_files = [f for f in os.listdir(job_folder) if f.endswith(".pdf")]

jobs = []

for file in job_files:

    path = os.path.join(job_folder, file)
    reader = PdfReader(path)
    text = "".join([page.extract_text() or "" for page in reader.pages])
    jobs.append({
        "name": os.path.splitext(file)[0],  # e.g., "cv_1"
        "description": text.strip()
    })

In [60]:
jobs[:2]

[{'name': 'JobDescription4',
  'description': "Financial Analyst  \nLocation: Lisbon, Portugal  \nDepartment: Financial Services  \nExperience Level: Staff  \n \nAbout the Role:  \nJoin our Financial Services team as a Financial Analyst. You will support \nfinancial planning and analysis activities, providing insights to drive \nbusiness performance.  \n \nKey Responsibilities:  \n \nAssist in the preparation of financial reports and forecasts.  \nAnalyze financial data to identify trends and variances.  \nCollaborate with business units to support decision -making. \nEnsure compliance with financial regulations and standards.  \nSupport ad -hoc financial analysis and projects.  \nRequirements:  \n \nBachelor's degree in Finance, Accounting, or related field.  \n0-2 years of experience in financial analysis or related roles.  \nStrong analytical and quantitative skills.  \nProficiency in Excel and financial modeling.  \nFluency in English and Portuguese required."},
 {'name': 'JobDescr

## Create data

In [61]:
results = []

for job in jobs:
    for candidate in cvs:
        print("evaluating candidate", candidate["name"], "for job", job["name"])
        response = chain.invoke({"job_description": job, "cv": candidate["cv"]})

        # Remove ```json ... ``` wrapper
        clean_response = re.sub(r"^```(?:json)?\s*|```$", "", response.strip(), flags=re.IGNORECASE).strip()

        try:
            parsed = json.loads(clean_response)
        except json.JSONDecodeError:
            parsed = {"experience": None, "education": None, "location": None, "overall": None}

        results.append({
            "job_description": job["name"],
            "candidate_name": candidate["name"],
            "experience": parsed["experience"],
            "education": parsed["education"],
            "location": parsed["location"],
            "overall": parsed["overall"],
        })

        if len(results) > 15:
            break
    if len(results) > 15:
            break

# Create DataFrame
df = pd.DataFrame(results)

evaluating candidate cv_47 for job JobDescription4
evaluating candidate cv_103 for job JobDescription4
evaluating candidate cv_87 for job JobDescription4
evaluating candidate cv_69 for job JobDescription4
evaluating candidate cv_94 for job JobDescription4
evaluating candidate cv_79 for job JobDescription4
evaluating candidate cv_38 for job JobDescription4
evaluating candidate cv_90 for job JobDescription4
evaluating candidate cv_105 for job JobDescription4
evaluating candidate cv_35 for job JobDescription4
evaluating candidate cv_18 for job JobDescription4
evaluating candidate cv_63 for job JobDescription4
evaluating candidate cv_49 for job JobDescription4
evaluating candidate cv_5 for job JobDescription4
evaluating candidate cv_74 for job JobDescription4
evaluating candidate cv_72 for job JobDescription4


In [62]:
df.sort_values(ascending=False, by="overall")

Unnamed: 0,job_description,candidate_name,experience,education,location,overall
15,JobDescription4,cv_72,50,30,10,90
1,JobDescription4,cv_103,50,30,5,85
3,JobDescription4,cv_69,45,30,10,85
4,JobDescription4,cv_94,45,30,10,85
13,JobDescription4,cv_5,45,30,10,85
12,JobDescription4,cv_49,45,30,7,82
2,JobDescription4,cv_87,45,25,10,80
14,JobDescription4,cv_74,45,30,5,80
8,JobDescription4,cv_105,45,25,5,75
9,JobDescription4,cv_35,40,30,5,75
