In [1]:
import os
import pdfplumber
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
with open("job_description.txt", "r", encoding="utf-8") as file:
    job_description = file.read()

In [3]:
def extract_text_from_pdf(pdf_path):
    text = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    return text

In [4]:
resume_folder = "resumes"
resume_texts = []
resume_names = []

for filename in os.listdir(resume_folder):
    if filename.endswith(".pdf"):
        path = os.path.join(resume_folder, filename)
        text = extract_text_from_pdf(path)
        resume_texts.append(text)
        resume_names.append(filename)

In [5]:
documents = [job_description] + resume_texts
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(documents)

In [6]:
similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]

In [7]:
results = pd.DataFrame({
    "Resume Name": resume_names,
    "Match %": [round(score * 100, 2) for score in similarities]
})

In [8]:
results = results.sort_values(by="Match %", ascending=False)

In [9]:
print("\n Resume Matching Results:\n")
print(results)


 Resume Matching Results:

                                   Resume Name  Match %
4                              Resume sonu.pdf    31.71
3                     Durga prasad  Resume.pdf    29.80
9                        vishakhaResume_ml.pdf    28.62
0  anurag-sharma-business-analytics-resume.pdf    26.54
1                                   ASNA.K.pdf    25.25
6                  ResumeAareen IIT Bombay.pdf    25.04
7                           Sachin CV 2.o .pdf    24.73
5                          Resume1 (1) (1).pdf    21.39
8                        Sonu Kumar Resume.pdf    13.74
2                      Bhumi Ghutke resume.pdf     0.00


In [10]:
results.to_csv("resume_match_results.csv", index=False)
print("\n Results saved as 'resume_match_results.csv'")


 Results saved as 'resume_match_results.csv'


In [14]:
results

Unnamed: 0,Resume Name,Match %
4,Resume sonu.pdf,31.71
3,Durga prasad Resume.pdf,29.8
9,vishakhaResume_ml.pdf,28.62
0,anurag-sharma-business-analytics-resume.pdf,26.54
1,ASNA.K.pdf,25.25
6,ResumeAareen IIT Bombay.pdf,25.04
7,Sachin CV 2.o .pdf,24.73
5,Resume1 (1) (1).pdf,21.39
8,Sonu Kumar Resume.pdf,13.74
2,Bhumi Ghutke resume.pdf,0.0
