# EY AI challenge 2025 - CV analyzer bot

## Imports

In [1]:
import pandas as pd
from PyPDF2 import PdfReader
import getpass
import os
import re
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain_core.output_parsers import StrOutputParser

## Define model

In [2]:
os.environ["GOOGLE_API_KEY"] = "AIzaSyDs1Odr7ypZkFb0qI1O8WM-GlztzkEmhXg"


In [3]:
llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [4]:
# Define prompt
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant that evaluates CVs and gives a score from 1 to 100 based on how suitable a candidate is for a given job description.",
        ),
        ("human", "Job description:\n{job_description}\n\nCV:\n{cv}"),
    ]
)

# Define chain
chain = prompt | llm | StrOutputParser()

## Read dataset

In [5]:

cv_folder = "../CVs/"
cv_files = [f for f in os.listdir(cv_folder) if f.endswith(".pdf")]

cvs = []

for file in cv_files:
    path = os.path.join(cv_folder, file)
    reader = PdfReader(path)
    text = "".join([page.extract_text() or "" for page in reader.pages])
    cvs.append({
        "name": os.path.splitext(file)[0],  # e.g., "cv_1"
        "cv": text.strip()
    })

In [6]:
cvs[0]

{'name': 'cv_47',
 'cv': 'cv_47.md 2025-05-02\n1 / 2Helena Santos\nTitle:  Healthcare Consultant\nLocation:  Porto, P ortugal\nEmail:  helena.santos@example.com\nPhone:  +351 912 345 678\nSummary\nHealthcare professional with background in hospital administration and 5 years of experience in healthcare\noperations improvement. Expertise in process optimization, quality management, and healthcare analytics.\nSeeking to leverage industry knowledge in a Big4 healthcare consulting practice.\nExperience\nHospital de São Jo ão – P orto\nOperations Manager\nMay 2021 – Present\nLed operational excellence initiatives resulting in 15% reduction in patient waiting times.\nImplemented quality management systems aligned with international healthcare standards.\nManaged cross-functional teams to improve resource allocation and department coordination.\nCUF Hospitals – Lisbon\nHealthcare Analyst\nFebruary 2019 – April 2021\nConducted performance analyses of clinical departments to identify improvemen

In [8]:

job_folder = "../JobDescriptions/"
job_files = [f for f in os.listdir(job_folder) if f.endswith(".pdf")]

jobs = []

for file in job_files:
    path = os.path.join(job_folder, file)
    reader = PdfReader(path)
    text = "".join([page.extract_text() or "" for page in reader.pages])
    jobs.append({
        "name": os.path.splitext(file)[0],  # e.g., "cv_1"
        "description": text.strip()
    })

In [20]:
jobs[:2]

[{'name': 'JobDescription4',
  'description': "Financial Analyst  \nLocation: Lisbon, Portugal  \nDepartment: Financial Services  \nExperience Level: Staff  \n \nAbout the Role:  \nJoin our Financial Services team as a Financial Analyst. You will support \nfinancial planning and analysis activities, providing insights to drive \nbusiness performance.  \n \nKey Responsibilities:  \n \nAssist in the preparation of financial reports and forecasts.  \nAnalyze financial data to identify trends and variances.  \nCollaborate with business units to support decision -making. \nEnsure compliance with financial regulations and standards.  \nSupport ad -hoc financial analysis and projects.  \nRequirements:  \n \nBachelor's degree in Finance, Accounting, or related field.  \n0-2 years of experience in financial analysis or related roles.  \nStrong analytical and quantitative skills.  \nProficiency in Excel and financial modeling.  \nFluency in English and Portuguese required."},
 {'name': 'JobDescr

## Create data

In [23]:
results = []

for job in [jobs[0]]:
    for candidate in [cvs[0]]:
        response = chain.invoke({"job_description": job, "cv": candidate["cv"]})
        results.append({
            "job_description": job,
            "candidate_name": candidate["name"],
            "score": response.strip()  # You may want to cast to int
        })

# Example: print or save
for res in results:
    print(f"{res['candidate_name']} scored {res['score']} for job: {res['job_description']}...")

cv_47 scored Here's an evaluation of the CV based on the job description:

**Strengths:**

*   **Location:** The candidate is located in Porto, Portugal, which is geographically close to Lisbon.
*   **Language:** The candidate is fluent in both Portuguese and English, which is a requirement.
*   **Skills:** The CV mentions healthcare analytics, which demonstrates analytical skills.
*   **Education:** The candidate has a relevant bachelor's degree.

**Weaknesses:**

*   **Experience:** The candidate's experience is primarily in healthcare, not finance. While analytical skills are transferable, the job description specifically seeks financial analysis experience.
*   **Industry Mismatch:** The candidate's background is in healthcare, while the job is in financial services.
*   **Skills Mismatch:** The CV does not mention proficiency in Excel or financial modeling, which are key requirements.
*   **Experience Level:** The candidate has 5 years of experience, which is more than the 0-2 yea