### Parsing Job Description

In [244]:
import json
from langchain_google_genai import GoogleGenerativeAI
from langchain.prompts import PromptTemplate

def parse_job_description(job_description):
    llm = GoogleGenerativeAI(
        temprature=0,
        model="gemini-pro",
        google_api_key="APNI API KEY DAAL"
    )
    prompt = PromptTemplate(input_variables=["text"], 
                            template="Extract education, acheivements, certifications, languages, projects, skills, and work experience from the following job description:\n{text}. The output should be in JSON format with the keys mentioned before. The value of all the keys will be lists containing the respective information from the job description.")
    
    chain = prompt | llm
    response = chain.invoke(input={"text":job_description})
    return response

with open(r"S:\resume_parsing\job_descriptions\Prof.-CS-Sitare-University.txt", 'r') as file:
        job_description = file.read()
    
job_desc = json.loads(parse_job_description(job_description)[8:-4])
job_desc

{'education': ['M.Tech./M.S. or Ph.D. in Computer Science or a closely related discipline from a reputed institute or university.'],
 'achievements': [],
 'certifications': [],
 'languages': [],
 'projects': [],
 'skills': ['Machine Learning', 'Systems', 'Security', 'HCI'],
 'work_experience': ['Over 2 years of teaching or industry experience in a relevant field.']}

### Resumes from DB

In [147]:
import ast
from langchain.sql_database import SQLDatabase

def get_resumes_from_db(db_connection_str, query, info, field_name):
    db = SQLDatabase.from_uri(db_connection_str)
    results = ast.literal_eval(db.run(query, fetch='all'))
    
    for result in results:
        deets_list = [{result[1].split('-->')[0]:result[1].split('-->')[1]}]
        for col in result[2:]:
            if col is not None:
                deets_list.append({col.split('-->')[0]:col.split('-->')[1]})
        
        if result[0] not in info.keys():
            info[result[0]] = {field_name:deets_list}
        else:
            info[result[0]][field_name] = deets_list
    
    return info

query = '''
    SELECT 
        personal_information_id,
        CONCAT('degree_courses-->', GROUP_CONCAT(degree_course ORDER BY id)) AS degree_courses,
        CONCAT('fields_of_study-->', GROUP_CONCAT(field_of_study ORDER BY id)) AS fields_of_study,
        CONCAT('institutes-->', GROUP_CONCAT(institute ORDER BY id)) AS institutes,
        CONCAT('marks_percentages_gpas-->', GROUP_CONCAT(marks_percentage_gpa ORDER BY id)) AS marks_percentages_gpas
    FROM 
        education_details
    GROUP BY 
        personal_information_id;
    '''

info = dict()
get_resumes_from_db(db_connection_str, query, info, "Education Details")

{1: {'Education Details': [{'degree_courses': "BCA - Bachelor's in Computer Application,HIGHER SECONDARY,SSLC"},
   {'fields_of_study': 'None,None,None'},
   {'institutes': 'Islamiah College (Autonomous) - ThiruvalluvarUniversity,Bethel Matric Hr Sec School,Bethel Matric Hr Sec School'},
   {'marks_percentages_gpas': '7.5 CGPA,50%,76.5%'}]},
 2: {'Education Details': [{'degree_courses': 'Master of Science - Artificial Intelligence & Machine Learning,Bachelor of Science - Computer Science,Class 12 - WBCHSE,Class 10 - WBBSE'},
   {'fields_of_study': 'None,None,None,None'},
   {'institutes': 'Christ (Deemed to be University), Bengaluru, India,St. Xavier’s College (Autonomous), Kolkata, India,Patha Bhavan High School, Kolkata, India,Patha Bhavan High School, Kolkata, India'},
   {'marks_percentages_gpas': '3.97 / 4 (GPA),82.4%,94.4%,91%'}]}}

In [159]:
def get_resume_info():
    info = dict()
    db_connection_str = "mysql://root:@127.0.0.1/resume"
    
    # Personal Info
    personal_query = '''
    SELECT 
        id,
        CONCAT('name-->', GROUP_CONCAT(name ORDER BY id)) AS name,
        CONCAT('email-->', GROUP_CONCAT(email ORDER BY id)) AS email,
        CONCAT('phone_number-->', GROUP_CONCAT(phone_number ORDER BY id)) AS phone_number,
        CONCAT('address-->', GROUP_CONCAT(address ORDER BY id)) AS address,
        CONCAT('linkedin_url-->', GROUP_CONCAT(linkedin_url ORDER BY id)) AS linkedin_url
    FROM 
        personal_information
    GROUP BY 
        id;
    '''
    info = get_resumes_from_db(db_connection_str, personal_query, info, "Personal Information")
    
    # Education
    education_query = '''
    SELECT 
        personal_information_id,
        CONCAT('degree_courses-->', GROUP_CONCAT(degree_course ORDER BY id)) AS degree_courses,
        CONCAT('fields_of_study-->', GROUP_CONCAT(field_of_study ORDER BY id)) AS fields_of_study,
        CONCAT('institutes-->', GROUP_CONCAT(institute ORDER BY id)) AS institutes,
        CONCAT('marks_percentages_gpas-->', GROUP_CONCAT(marks_percentage_gpa ORDER BY id)) AS marks_percentages_gpas
    FROM 
        education_details
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, education_query, info, "Education Details")
    
    # Certification
    certification_query = '''
    SELECT 
        personal_information_id,
        CONCAT('certification_title-->', GROUP_CONCAT(certification_title ORDER BY id)) AS certification_title,
        CONCAT('date_of_issue-->', GROUP_CONCAT(date_of_issue ORDER BY id)) AS date_of_issue,
        CONCAT('issuing_organization-->', GROUP_CONCAT(issuing_organization ORDER BY id)) AS issuing_organization
    FROM 
        certification_details
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, certification_query, info, "Certifications")

    # Achievements
    achi_query = '''
    SELECT 
        personal_information_id,
        CONCAT('achievement_description-->', GROUP_CONCAT(achievement_description ORDER BY id)) AS achievement_description
    FROM 
        achievements
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, achi_query, info, "Achievements")
    
    # Languages
    language_query = '''
    SELECT 
        personal_information_id,
        CONCAT('language-->', GROUP_CONCAT(language ORDER BY id)) AS language,
        CONCAT('proficiency_level-->', GROUP_CONCAT(proficiency_level ORDER BY id)) AS proficiency_level
    FROM 
        language_competencies
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, language_query, info, "Languages")
    
    # Projects
    project_query = '''
    SELECT 
        personal_information_id,
        CONCAT('project_name-->', GROUP_CONCAT(project_name ORDER BY id)) AS project_name,
        CONCAT('description-->', GROUP_CONCAT(description ORDER BY id)) AS description
    FROM 
        project_details
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, project_query, info, "Projects")
    
    # Skills
    skill_query = '''
    SELECT 
        personal_information_id,
        CONCAT('skill-->', GROUP_CONCAT(skill ORDER BY id)) AS skill
    FROM 
        skills
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, skill_query, info, "Skills")
    
    # Work Experience
    we_query = '''
    SELECT 
        personal_information_id,
        CONCAT('job_title-->', GROUP_CONCAT(job_title ORDER BY id)) AS job_title,
        CONCAT('company_name-->', GROUP_CONCAT(company_name ORDER BY id)) AS company_name,
        CONCAT('description-->', GROUP_CONCAT(description ORDER BY id)) AS description
    FROM 
        work_experience
    GROUP BY 
        personal_information_id;
    '''
    info = get_resumes_from_db(db_connection_str, we_query, info, "Work Experience")
    
    return info

resume_info = get_resume_info()
resume_info

{1: {'Personal Information': [{'name': 'Viges D'},
   {'email': 'dhananjeyanvigesh@gmail.com'},
   {'phone_number': '8778403686'},
   {'address': 'Marathalli, Kundalahalli Gate, Bengalore, India'},
   {'linkedin_url': 'https://linkedin.com/in/vigesh-d-329715272'}],
  'Education Details': [{'degree_courses': "BCA - Bachelor's in Computer Application,HIGHER SECONDARY,SSLC"},
   {'fields_of_study': 'None,None,None'},
   {'institutes': 'Islamiah College (Autonomous) - ThiruvalluvarUniversity,Bethel Matric Hr Sec School,Bethel Matric Hr Sec School'},
   {'marks_percentages_gpas': '7.5 CGPA,50%,76.5%'}],
  'Certifications': [{'certification_title': 'HTML5 and CSS3 basic to advance course'},
   {'issuing_organization': 'Udemy'}],
  'Languages': [{'language': 'English,Tamil'},
   {'proficiency_level': 'Professional Working Proﬁciency,Native or Bilingual Proﬁciency'}],
  'Projects': [{'project_name': 'Static WebPage,Animated WebPage,Landing and Animated Web Page'},
   {'description': 'Created a

### Scoring Resumes

In [10]:
def score_resumes(resumes, responsibilities, qualifications):
    scores = []
    for resume in resumes:
        score = 0
        for responsibility in responsibilities:
            if responsibility.lower() in resume['description'].lower():
                score += 1
        for qualification in qualifications:
            if qualification.lower() in resume['description'].lower():
                score += 1
        scores.append({"resume_id": resume["id"], "score": score})
    return scores

### Integration

In [None]:
from langchain.agents import initialize_agent, Tool
from langchain.tools.base import Tool
from langchain.chains import LLMChain

class JobDescriptionTool(Tool):
    name = "job_description_tool"
    description = "Parses job descriptions into responsibilities and qualifications"
    def _call(self, inputs):
        job_description = inputs["job_description"]
        return parse_job_description(job_description)

class ResumeDBTool(Tool):
    name = "resume_db_tool"
    description = "Retrieves resumes from the database"
    def _call(self, inputs):
        db_connection_str = inputs["db_connection_str"]
        return get_resumes_from_db(db_connection_str)

tools = [JobDescriptionTool(), ResumeDBTool()]

agent = initialize_agent(tools=tools, agent_type="zero_shot")

# Main function to score resumes
def main(job_description_path, db_connection_str):
    with open(job_description_path, 'r') as file:
        job_description = file.read()
    
    parsed_job_description = agent({"job_description_tool": {"job_description": job_description}})
    
    responsibilities = parsed_job_description['responsibilities']
    qualifications = parsed_job_description['qualifications']
    
    resumes = agent({"resume_db_tool": {"db_connection_str": db_connection_str}})
    
    scores = score_resumes(resumes, responsibilities, qualifications)
    
    return scores

job_description_path = ".txt"
db_connection_str = "mysql://root:@127.0.0.1/resume"
scores = main(job_description_path, db_connection_str)
print(scores)

### Metric-In-Prompt

In [253]:
prompt_template = """
You are a LangChain agent tasked with scoring a resume based on a given job description. The resume is provided as a JSON string, and the job description is provided as a regular string. The scoring criteria are divided into three categories: Keyword Matching (40%), Experience Alignment (30%), and Skill Matching (30%). Follow the steps below to calculate the final score out of 100. Ensure the scoring process is deterministic and the logic is clear.

Step 1: Keyword Matching (40%)
- Identify keywords from the job description.
- Count the occurrences of these keywords in the resume.
- Calculate the keyword matching score as follows:
keyword_matching_score = (number_of_matched_keywords / total_number_of_keywords) * 40

Step 2: Experience Alignment (30%)
- Extract relevant experience details from the resume.
- Compare the candidate's experience with the job description requirements.
- Assign a score based on how well the experience aligns with the job description:
- Perfect alignment: 30
- Good alignment: 20-29
- Moderate alignment: 10-19
- Poor alignment: 0-9

Step 3: Skill Matching (30%)
- Identify required skills from the job description.
- Check if these skills are present in the resume.
- Calculate the skill matching score as follows:
skill_matching_score = (number_of_matched_skills / total_number_of_skills) * 30

Step 4: Calculate Final Score
- Combine the scores from each category to get the final score out of 100:
final_score = keyword_matching_score + experience_alignment_score + skill_matching_score

Step 5: Output
- Return the final score as a real number.

Do not output any reasoning. The output will be only a real number.

Follow the above steps to give an output for the following input,
Job Description: {job_description}
Resume JSON: {resume}
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["job_description", "resume"])

llm = GoogleGenerativeAI(
        temprature=0,
        top_p=1,
        seed=42,
        model="gemini-pro",
        google_api_key="APNI API KEY DAAL"
      )

chain = prompt | llm

scored_resumes = []
for k, v in zip(resume_info.keys(), resume_info.values()):
    score = chain.invoke(input={"job_description":job_desc, "resume":v})
    scored_resumes.append((k, score))

for resume, score in scored_resumes:
    print(f"Resume: {resume}\nScore: {score}\n")

Resume: 1
Score: 75.5

Resume: 2
Score: 91.2

