### generate relevative scores

In [4]:
import os
from functions import text_extraction
root = "/".join(os.getcwd().split("/")[:-1])
resumes_path = os.path.join(root, 'data', "raw", "cv")
resumes_list = os.listdir(resumes_path)
candidates_dict = {
    file.split("-")[1].split(sep = ".")[0].replace("_", " ").title() + f" {index}" : text_extraction(os.path.join(resumes_path, file)) for index, file in enumerate(resumes_list)
}
candidates_dict

{'Ashwat Resume 0': 'ASHWAT KUMAR CHAMAN\n+971 558078154 | ashwatkumar.ak1@gmail.com | linkedin.com/in/ashwat-kumar-chaman-144650173 | Dubai\n------------------------------------------------------------------------------------------------------------------------------\nEDUCATION\nChandigarh University, Mohali\nMaster of Business Administration, Business Analytics (07/2022 - 05/2024)\nPanjab University, Chandigarh\nBachelor of Arts (07/2018 - 07/2021)\nEXPERIENCE\nInnovative Incentives and Rewards Pvt. Ltd. Mohali\nProject Coordinator (07/2024-04/2025)\n• Led data-driven operations for a premium loyalty program targeting retailers and distributors, enhancing program\nperformance and engagement through real-time data tracking.\n• Designed automated reporting systems using Power BI, improving the analysis of key metrics like participation and\nreward redemption.\n• Utilized SQL and Excel to streamline data pipelines, ensuring accurate tracking of member activities and interactions.\n• Col

In [18]:
import pandas as pd
scores_path = os.path.join(root, "data/processed/scores.csv")
scores_df = pd.read_csv(
    scores_path,
    index_col = ['candidate_id', 'group_id']
)
scores_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 26 entries, ('af62b86c3a326086', np.int64(2)) to ('c5f45dfe145427e1', np.int64(1))
Data columns (total 25 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Data Analytics             26 non-null     int64  
 1   Business Transformation    26 non-null     int64  
 2   Information Technology     26 non-null     int64  
 3   Cloud Computing            26 non-null     int64  
 4   Data Governance            26 non-null     int64  
 5   SQL                        26 non-null     int64  
 6   ETL/ELT Processes          26 non-null     int64  
 7   Data Visualization Tools   26 non-null     int64  
 8   Data Transformation Tools  26 non-null     int64  
 9   Data Modeling              26 non-null     int64  
 10  Communication Skills       26 non-null     int64  
 11  Problem-Solving            26 non-null     int64  
 12  Team Collaboration         26 non-null     in

In [39]:
candidate_1_score = scores_df.loc['c5f45dfe145427e1', 'Data Analytics'].values[0]
candidate_2_score = scores_df.loc['b5142b9b9676a4f9', 'Data Analytics'].values[0]
candidate_1_score, candidate_2_score


(np.int64(85), np.int64(90))

In [40]:
from langchain_core.prompts import ChatPromptTemplate

rationale_template = [
    (
        "system",
        "You are a talent acquisition analyst. Your task is to provide a brief rationale for the score given to each candidate "
        "within a specific group for a single evaluation criterion. The rationale should be a concise summary of why "
        "each candidate received their specific score, drawing direct evidence from their resume to justify their ranking "
        "relative to the others in the same group."
    ),
    (
        "human",
        "Provide a comparative rationale for each candidate in the following group for the '{criteria_element}' criterion."
        "\n\nThe score of the candidate '{candidate_1_id}' is: {candidate_1_score}\n"
        "The candidate resume is the following:\n{candidate_1_resume}\n\n"
        "\n\nThe score of the candidate '{candidate_2_id}' is: {candidate_2_score}\n"
        "The candidate resume is the following:\n{candidate_2_resume}\n\n"

    )
]
rationale_prompt = ChatPromptTemplate.from_messages(messages = rationale_template)

rationale_prompt.invoke({
    "criteria_element" : "Data Analytics",
    "candidate_1_id" : "c5f45dfe145427e1",
    "candidate_1_resume" : candidates_dict['c5f45dfe145427e1'],
    "candidate_1_score" : candidate_1_score,
    "candidate_2_id" : "b5142b9b9676a4f9",
    "candidate_2_score" : candidate_2_score,
    "candidate_2_resume" : candidates_dict['b5142b9b9676a4f9']
})

ChatPromptValue(messages=[SystemMessage(content='You are a talent acquisition analyst. Your task is to provide a brief rationale for the score given to each candidate within a specific group for a single evaluation criterion. The rationale should be a concise summary of why each candidate received their specific score, drawing direct evidence from their resume to justify their ranking relative to the others in the same group.', additional_kwargs={}, response_metadata={}), HumanMessage(content="Provide a comparative rationale for each candidate in the following group for the 'Data Analytics' criterion.\n\nThe score of the candidate 'c5f45dfe145427e1' is: 85\nThe candidate resume is the following:\nJUSTIN TRAN\nMelbourne | 0468.365.427 | justintran3103@gmail.com | LinkedIn | GitHub\nPROFILE\nWith hands-on experience in business intelligence and data transformation using SQL, Python, and Power BI, I have proven experience in\noptimizing operations, reducing manual processes by automation,

In [41]:
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from typing import List


load_dotenv()
llm = ChatOpenAI(
    model = "gpt-4o-mini-2024-07-18"
)

class CandidateRationale(BaseModel):
    """The rationale for a single candidate's score."""
    candidate_id: str = Field(..., description="The unique ID of the candidate.")
    rationale: List[str] = Field(..., description="A concise explanation of the candidate's score in the specified criterion, in comparison to other candidates in the group.")

class GroupRationales(BaseModel):
    """A list of rationales for a group of candidates."""
    rationales: List[CandidateRationale] = Field(..., description="A list of rationales for each candidate in the group.")

llm_constrained = llm.with_structured_output(schema = GroupRationales)
rationale_chain = rationale_prompt | llm_constrained
response = rationale_chain.invoke({
    "criteria_element" : "Data Analytics",
    "candidate_1_id" : "c5f45dfe145427e1",
    "candidate_1_resume" : candidates_dict['c5f45dfe145427e1'],
    "candidate_1_score" : candidate_1_score,
    "candidate_2_id" : "b5142b9b9676a4f9",
    "candidate_2_score" : candidate_2_score,
    "candidate_2_resume" : candidates_dict['b5142b9b9676a4f9']
})
response

GroupRationales(rationales=[CandidateRationale(candidate_id='c5f45dfe145427e1', rationale=["Justin's score of 85 is justified by his extensive hands-on experience with business intelligence, SQL, and Python, as well as practical work optimizing operations and reducing manual processes significantly.", 'His project work with Power BI and practical application of data pipelines demonstrates a solid ability to deliver actionable insights, enhancing inventory management and operational efficiency, placing him above many of his peers.', 'However, while his performance metrics and continuous support to team members show a commendable initiative, his overall impact on quantitative metrics may not surpass other candidates with more direct data science applications.']), CandidateRationale(candidate_id='b5142b9b9676a4f9', rationale=["Abhishek's score of 90 reflects his advanced expertise in data analysis and visualization, particularly with his master's degree in data science and experience util

In [42]:
for reason in response.rationales:
    print(f"candidate id '{reason.candidate_id}': {reason.rationale}")

candidate id 'c5f45dfe145427e1': ["Justin's score of 85 is justified by his extensive hands-on experience with business intelligence, SQL, and Python, as well as practical work optimizing operations and reducing manual processes significantly.", 'His project work with Power BI and practical application of data pipelines demonstrates a solid ability to deliver actionable insights, enhancing inventory management and operational efficiency, placing him above many of his peers.', 'However, while his performance metrics and continuous support to team members show a commendable initiative, his overall impact on quantitative metrics may not surpass other candidates with more direct data science applications.']
candidate id 'b5142b9b9676a4f9': ["Abhishek's score of 90 reflects his advanced expertise in data analysis and visualization, particularly with his master's degree in data science and experience utilizing Azure services and machine learning techniques to generate actionable insights.", 