In [445]:
from dotenv import load_dotenv
from typing_extensions import TypedDict
from typing import Dict, List, Any
from langgraph.graph import START, END, StateGraph
from langchain.chat_models import init_chat_model
from langchain.prompts import ChatPromptTemplate
from langgraph.graph.message import add_messages
from langchain_core.output_parsers import JsonOutputParser

import json

load_dotenv()

True

### Fetch resumes in a list

In [446]:
import os
from typing import List

def load_resumes_from_directory(directory_path: str = "./resumes") -> List[str]:
    """
    Parses through the specified directory, reads each file's content,
    and returns a list of strings, where each string is the content of a resume file.

    Args:
        directory_path (str, optional): The path to the directory containing the resume files.
            Defaults to "./resumes".

    Returns:
        List[str]: A list of strings, where each string represents the content of a resume file.
                   Returns an empty list if the directory does not exist or is empty.
    """
    resumes: List[str] = []
    try:
        # Check if the directory exists
        if not os.path.exists(directory_path):
            print(f"Error: Directory not found at {directory_path}")
            return []  # Return an empty list

        # Iterate through all files in the directory
        for filename in os.listdir(directory_path):
            filepath = os.path.join(directory_path, filename)

            # Check if it's a file (and not a subdirectory)
            if os.path.isfile(filepath):
                try:
                    # Open the file in read mode with UTF-8 encoding
                    with open(filepath, "r", encoding="utf-8") as file:
                        # Read the entire content of the file
                        resume_content = file.read()
                        resume_content = resume_content.lower().replace('linux', 'unix')
                        resumes.append(resume_content)
                except Exception as e:
                    print(f"Error reading file {filename}: {e}")
                    # Optionally, you could choose to continue to the next file
                    # instead of returning immediately, depending on your error
                    # handling policy.  For example:
                    # continue
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

    return resumes

#### Define job description

In [447]:
job_description = """
Job Title: Python Developer
Location: Chennai, Tamil Nadu, India
Company: ABC Private Limited
Job Summary:
We are seeking a talented and passionate Python Developer to join our dynamic team in Chennai. 
You will be responsible for designing, developing, and deploying scalable and efficient software 
solutions using Python. This role offers an exciting opportunity to work on challenging projects, 
collaborate with experienced engineers, and contribute to the growth and innovation of our 
products/services.

Required Skills and Experience:

Primary Skill: Proven experience as a Python Developer with a strong understanding of Python fundamentals and best practices.
Years of Experience: 3 to 8 years of professional software development experience.
Proficiency in working with Shell scripting and unix commands for automation tasks.

Nice to Have: 
Familiarity with Java
Experience with one or more Python web frameworks such as Django, Flask, or FastAPI.
Experience with cloud platforms such as AWS, Azure, or Google Cloud Platform (GCP).

Educational Requirements:
Essential: 
Bachelor of Engineering (B.E.) or Bachelor of Technology (B.Tech.) degree in Computer Science, 
Information Technology, Electronics and Communication Engineering, or a closely 
related engineering discipline from a recognized institution. 
Candidates with equivalent engineering degrees will also be considered.

Desirable: 
Master of Engineering (M.E.) or Master of Technology (M.Tech.) degree 
in a relevant engineering specialization.

Other requirements:
Experience with unit testing, integration testing, and other testing methodologies.
Familiarity with Agile development methodologies.
Excellent problem-solving and analytical skills.
Strong communication and collaboration skills.   

Not Considered: 
Degrees in Bachelor of Science (B.Sc.), Bachelor of Arts (B.A.), 
Master of Science (M.Sc.), Master of Arts (M.A.), Doctor of Philosophy (Ph.D.), 
or any other non-engineering degrees will not be considered for this role.

Salary criteria:
Competitive salary and benefits package that includes 30% to 50% of pay hike 
from the current pay scale based on the performance in interview.
upto 3 years - Upto 15 lakh per annum
4 years - Upto 18 lakh per annum
5 years - Upto 21 lakh per annum
6 years - Upto 25 lakh per annum
7 years - Upto 30 lakh per annum
8 years and more - Upto 35 lakh per annum"""

#### Lets gather resume

In [448]:
def preprocess_resumes(state: State) -> State:
    prompt_template = """
You are an expert at parsing resumes and extracting key information.

Given the following resume text, extract the following information and format it as a JSON object.

Resume Texts:
{resume_texts}

JSON Output Format:
A list of JSON objects, each in the format:
[
  {{
    "name": "Full Name of the candidate",
    "skills": ["List", "of", "technical", "skills"],
    "education": "Highest level of education and degree",
    "experience_years": "Total years of relevant work experience as an integer",
    "work_history": [
      {{"title": "Job Title", "company": "Company Name", "years": "Years as integer", "description": "Brief description"}},
      ...
    ],
    "soft_skills": ["List", "of", "soft", "skills"]
  }},
  ...
]

Return only the list of dictionaries as raw JSON. Do not include any explanation, markdown, or code fencing.
"""
    resume_texts = '\n\n'.join(state['resume_text_list'])
    prompt = ChatPromptTemplate.from_template(prompt_template)
    
    # chain 
    print(f'LOG: Using Gemini model to preprocess resumes and extract useful information.')
    chain = prompt | gemini_model | JsonOutputParser()

    # Correct invocation
    result = chain.invoke({"resume_texts": resume_texts})  # result is a list/dict now
    return {**state, "resumes_cleaned": result}


#### Define ChatGPT model

In [449]:
open_ai_model = init_chat_model('gpt-4o-mini-2024-07-18')

#### Define Gemini model

In [450]:
import google.generativeai as genai
from langchain_google_genai import ChatGoogleGenerativeAI

# Initialize the Gemini 1.5 Flash model
gemini_model = ChatGoogleGenerativeAI(model='gemini-1.5-flash')

### Define state

In [451]:
class State(TypedDict):
    name: str
    job_description: str 
    criteria: Dict[str, Any]
    resume_text_list: List[str]
    resumes_cleaned: List[Dict[str, Any]]
    scored_candidates: List[Dict[str, Any]]
    ranked_candidates: List[Dict[str, Any]]

### Extract criteria (Gemini model)

### Define extract_criteria node

In [452]:
def extract_criteria(state: State) -> State:
    prompt_template = """
You are an expert at understanding job descriptions and extracting key criteria.
Based on the following job description, extract the required skills, preferred skills,
minimum years of experience, minimum education level, and any mentioned soft skills.

Job Description:
{job_description}

Return the extracted information as a Python dictionary with the following keys. 
Ensure that the required_skills and preferred_skills values are one worded rather than sentences.
"required_skills", "preferred_skills", "experience_years", "education_level", "soft_skills".

- "required_skills" and "preferred_skills" should be lists of strings.
- "min_experience_years" should be an integer representing the minimum required years of experience (if a range is given, take the lower bound). If not explicitly mentioned, infer if possible or return None.
- "max_experience_years" should be an integer representing the maximum required years of experience (if a range is given, take the lower bound). If not explicitly mentioned, infer if possible or return None.
- "education_level" should be a string representing the minimum required education level (e.g., "Bachelor", "Master", "None"). If not explicitly mentioned, infer if possible or return None.
- "soft_skills" should be a list of strings. If no soft skills are explicitly mentioned, return an empty list.

Return only the dictionary as raw JSON. Do not include any explanation, markdown, or code fencing.
"""
    prompt = ChatPromptTemplate.from_template(prompt_template)
    job_description = state['job_description']
    
    # chain 
    # chain = prompt | open_ai_model | JsonOutputParser()
    chain = prompt | gemini_model | JsonOutputParser()

    print(f'LOG: Using OpenAI model to parse resumes...')

    # Exctract criteria and store it in State
    try:
        criteria = chain.invoke({'job_description': job_description})
        return {**state, 'criteria': criteria}
    except Exception as e:
        return {"error": f"Error extracting criteria: {e}"}



#### Score candidates

In [453]:

def score_skills(required_skills: List[str], candidate_skills: List[str]) -> float:
    total_score = 0.0
    num_required_skills = len(required_skills)
    
    required_skills = [skill.lower() for skill in required_skills]
    print(f'Log: Required skills: {required_skills}')
    print(f'Log: Candidate skills: {candidate_skills}')

    for candidate_skill in candidate_skills:
        if candidate_skill in required_skills:
            total_score += 1 
            print(f"Log: Candidate's {candidate_skill} matches with required skills")
    
    score = total_score / num_required_skills
    print(f'Log: Number of skills met: {total_score}')
    print(f'Log: Score: {score}')

    return score

In [454]:
def score_education(candidate_education) -> float:
    candidate_education = candidate_education.lower()

    print(f'LOG: Candidate has degree: "{candidate_education}"')

    if 'art' in candidate_education or ('engineer' not in candidate_education and 'technology' not in education):
        print(f'LOG: Candidate has in-eligible degree. Rejecting the profile ...')
        return -1.0
    
    if 'computer' in candidate_education or 'technology' in candidate_education:
        # 50% score for computer science master graduates
        if 'master' in candidate_education:
            print(f'LOG: Candidate has masters in computer science graduation. Providing 100% score')
            return 1.0
        
        # 75% score for computer science bachelor graduates
        print(f'LOG: Candidate has bachelors in computer science graduation. Providing 100% score')
        return 0.75
    
    # 50% score for non computer science graduates
    print(f'LOG: Candidate has non-computer science graduation. Providing 50% score')
    return 0.5

In [455]:
def score_experience(expected_start_exp, expected_end_exp, candidate_exp):
    if candidate_exp >= expected_start_exp and candidate_exp <= expected_end_exp:
        print(f'LOG: Candidate experience: "{candidate_exp}" aligns with the experience range. Providing 100% score')
        return 1
    
    if candidate_exp in range(expected_start_exp - 1, expected_start_exp) or candidate_exp in range(expected_end_exp, expected_end_exp + 3):
        print(f'LOG: Candidate experience: "{candidate_exp}" is near the experience range. Providing 70% score')
        return 0.7

    print(f'LOG: Candidate experience: "{candidate_exp}" does not align with the experience range. Providing 50% score')
    return 0.25

In [456]:
def score_candidates(state: State) -> State:
    # Criteria
    criteria = state["criteria"]
    required_skills = criteria["required_skills"]
    preferred_skills = criteria["preferred_skills"]
    min_experience_years = criteria["min_experience_years"]
    max_experience_years = criteria["max_experience_years"]
    soft_skills_required = criteria["soft_skills"]

    # Gather resumes
    cleaned_resumes = state["resumes_cleaned"]

    scored_candidates: List[Dict[str, Any]] = []
    
    # todo: Need to loop here
    for cleaned_resume in cleaned_resumes:
        candidate_name = cleaned_resume["name"]
        candidate_skills = cleaned_resume["skills"]
        candidate_exp = cleaned_resume["experience_years"]
        candidate_education = cleaned_resume["education"]
        candidate_soft_skills = cleaned_resume["soft_skills"]

        # Calculate scores
        required_skills_score = score_skills(required_skills, candidate_skills)
        preferred_skills_score = score_skills(preferred_skills, candidate_skills)
        soft_skills_score = score_skills(soft_skills_required, candidate_soft_skills)
        exp_score = score_experience(min_experience_years, max_experience_years, candidate_exp)
        education_score = score_education(candidate_education)

        scored_candidates.append(
            {  
                candidate_name: {
                    'required_skills_score': required_skills_score,
                    'preferred_skills_score': preferred_skills_score,
                    'soft_skills_score': soft_skills_score,
                    'exp_score': exp_score,
                    'education_score': education_score,
                }
            }
        )

    return {**state, 'scored_candidates': scored_candidates}
    

#### Rank candidates

In [457]:
def rank_candidates(state: State) -> State:
    scored = state['scored_candidates']
    ranked = []

    for candidate_dict in scored:
        for name, scores in candidate_dict.items():
            if scores.get('education_score') == -1:
                continue 
            
            total = sum([
                scores.get('required_skills_score', 0),
                scores.get('preferred_skills_score', 0),
                scores.get('soft_skills_score', 0),
                scores.get('exp_score', 0),
                scores.get('education_score', 0),
            ])
            scores['total_score'] = total
            ranked.append((name, scores))

    # Sort by total_score in descending order
    ranked.sort(key=lambda x: x[1]['total_score'], reverse=True)

    # Optional: Return as a list of dicts again
    return {**state, 'ranked_candidates': [{name: data} for name, data in ranked]}

#### Present candidate info

In [458]:
from tabulate import tabulate

def display_ranked_candidates(state: State) -> State:
    candidates = []

    print(f'**** state ****\n{json.dumps(state, indent=4)}')

    for entry in state['ranked_candidates']:
        for name, scores in entry.items():
            total_score = sum([
                scores.get('required_skills_score', 0),
                scores.get('preferred_skills_score', 0),
                scores.get('soft_skills_score', 0),
                scores.get('exp_score', 0),
                scores.get('education_score', 0)
            ])
            candidates.append({
                "Candidate": name.title(),
                "Skills Score": f"{scores.get('required_skills_score', 0):.2f}",
                "Education Score": f"{scores.get('education_score', 0):.2f}",
                "Experience Score": f"{scores.get('exp_score', 0):.2f}",
                "Total Score": f"{total_score:.2f}"
            })

    candidates_sorted = sorted(candidates, key=lambda x: float(x['Total Score']), reverse=True)

    print(tabulate(candidates_sorted, headers="keys", tablefmt="grid"))

    return state  # ensures compatibility with LangChain workflows


In [459]:
graph_builder = StateGraph(State)

graph_builder.add_node('extract_criteria', extract_criteria)
graph_builder.add_node('preprocess_resumes', preprocess_resumes)
graph_builder.add_node('score_candidates', score_candidates)
graph_builder.add_node('rank_candidates', rank_candidates)
graph_builder.add_node('display_ranked_candidates', display_ranked_candidates)

graph_builder.add_edge(START, 'extract_criteria')
graph_builder.add_edge('extract_criteria', 'preprocess_resumes')
graph_builder.add_edge('preprocess_resumes', 'score_candidates')
graph_builder.add_edge('score_candidates', 'rank_candidates')
graph_builder.add_edge('rank_candidates', 'display_ranked_candidates')
graph_builder.add_edge('display_ranked_candidates', END)

graph = graph_builder.compile()


In [460]:
resume_text_list = load_resumes_from_directory('./resumes')

print(f'LOG: No. of resumes detected: {len(resume_text_list)}')

graph.invoke(
    {
        'job_description': job_description,
        'resume_text_list': resume_text_list
    }
)

LOG: No. of resumes detected: 5
LOG: Using OpenAI model to parse resumes...
LOG: Using Gemini model to preprocess resumes and extract useful information.
Log: Required skills: ['python', 'shell', 'scripting', 'testing']
Log: Candidate skills: ['c++', 'embedded systems', 'unix', 'bash', 'python', 'git']
Log: Candidate's python matches with required skills
Log: Number of skills met: 1.0
Log: Score: 0.25
Log: Required skills: ['java', 'django', 'flask', 'fastapi', 'aws', 'azure', 'gcp']
Log: Candidate skills: ['c++', 'embedded systems', 'unix', 'bash', 'python', 'git']
Log: Number of skills met: 0.0
Log: Score: 0.0
Log: Required skills: ['problem-solving', 'analytical', 'communication', 'collaboration']
Log: Candidate skills: []
Log: Number of skills met: 0.0
Log: Score: 0.0
LOG: Candidate experience: "7" aligns with the experience range. Providing 100% score
LOG: Candidate has degree: "master of science in electrical engineering"
LOG: Candidate has non-computer science graduation. Provid

{'job_description': '\nJob Title: Python Developer\nLocation: Chennai, Tamil Nadu, India\nCompany: ABC Private Limited\nJob Summary:\nWe are seeking a talented and passionate Python Developer to join our dynamic team in Chennai. \nYou will be responsible for designing, developing, and deploying scalable and efficient software \nsolutions using Python. This role offers an exciting opportunity to work on challenging projects, \ncollaborate with experienced engineers, and contribute to the growth and innovation of our \nproducts/services.\n\nRequired Skills and Experience:\n\nPrimary Skill: Proven experience as a Python Developer with a strong understanding of Python fundamentals and best practices.\nYears of Experience: 3 to 8 years of professional software development experience.\nProficiency in working with Shell scripting and unix commands for automation tasks.\n\nNice to Have: \nFamiliarity with Java\nExperience with one or more Python web frameworks such as Django, Flask, or FastAPI

## 