In [3]:
!pip install PyPDF2
import spacy
from collections import defaultdict
from heapq import nlargest
from PyPDF2 import PdfReader
from pathlib import Path
import io



In [4]:
nlp = spacy.load("en_core_web_sm")

In [5]:
def parse_resume(file_path):
    with open(file_path, "rb") as file:
        pdf_reader = PdfReader(file)
        resume_text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            resume_text += page.extract_text()
    candidate_name = extract_candidate_name(resume_text) 
    return nlp(resume_text), candidate_name

In [6]:
def extract_candidate_name(resume_text):
    candidate_name = ""
    for token in resume_text.split():
        if token.istitle():
            candidate_name += token + " "
        elif candidate_name:
            break
    return candidate_name.strip()

In [7]:
def get_job_description():
    job_description = input("Enter job description: ")
    return nlp(job_description)

In [8]:
def evaluate_candidate(resume, job_description, years_of_experience, location_preference):
    ##the metrics can be adjusted as needed
    score = 0
    similarity = resume.similarity(job_description)
    score += similarity
    
    years_exp = extract_years_of_experience(resume)
    location = extract_location(resume)
    
    if years_exp >= years_of_experience and location == location_preference:
        score += 1
    return score

In [9]:
def extract_years_of_experience(resume):
    years_exp = 0
    for ent in resume.ents:
        if ent.label_ == "DATE" and "year" in ent.text.lower():
            for token in ent:
                if token.pos_ == "NUM":
                    years_exp = int(token.text)
    return years_exp

In [10]:
def extract_location(resume):
    location = ""
    for ent in resume.ents:
        if ent.label_ == "GPE":
            location = ent.text
            break
    return location

In [11]:
def find_top_candidates(resumes, job_description, years_of_experience, location_preference, top_n=20):
    scores = defaultdict(int)
    for resume_path in resumes:
        resume, candidate_name = parse_resume(resume_path)
        score = evaluate_candidate(resume, job_description, years_of_experience, location_preference)
        scores[candidate_name] = (resume_path, score)
    top_candidates = nlargest(top_n, scores.items(), key=lambda x: x[1][1])
    return top_candidates

In [12]:
import warnings
warnings.filterwarnings("ignore")

In [13]:
num_resumes = int(input("Enter the number of resumes: "))
resumes = []
for i in range(num_resumes):
    resume_path = input(f"Enter path to resume {i+1} (PDF): ")
    resumes.append(resume_path)

job_description = get_job_description()
years_of_experience = int(input("Enter years of experience required: "))
location_preference = input("Enter location preference: ")

top_candidates = find_top_candidates(resumes, job_description, years_of_experience, location_preference)
print("Top candidates:")
for rank, (name, (resume_path, score)) in enumerate(top_candidates, start=1):
    print(f"Rank: {rank}, Name: {name}, Resume Path: {resume_path}, Score: {score}")

Enter the number of resumes:  2
Enter path to resume 1 (PDF):  /kaggle/input/resumes2/1resume-sep.pdf
Enter path to resume 2 (PDF):  /kaggle/input/resumes2/PNeelima_resume.pdf
Enter job description:  Enthusiastic recent graduate with a degree in Computer Science and a passion for software development.  Proficient in Java and Python, with a strong foundation in algorithm design.  Eager to contribute to a dynamic team and apply problem-solving skills in a collaborative environment
Enter years of experience required:  0
Enter location preference:  remote


Top candidates:
Rank: 1, Name: Contact Enthusiastic, Resume Path: /kaggle/input/resumes2/PNeelima_resume.pdf, Score: 0.5255526106402673
Rank: 2, Name: Neelima Pentapalli, Resume Path: /kaggle/input/resumes2/1resume-sep.pdf, Score: 0.45461675275305063
