In [3]:
import PyPDF2
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to extract text from PDF files
def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
    return text

# User input - job description
job_description = """
OBJECTIVE: Seeking internship in testing
EDUCATION B.Tech, Software Engineering
May 2020
DIT University ,DEHRADUN , GPA: 8.2
A.S., Computer Science Jun 20XX
DeAnza College, Cupertino, CA, GPA: 37.8
TECHNICAL SKILLS
Programming: Java, Git, C, Python,C++,HTML, JavaScript, Swift, MySQL
Frameworks: AngularJS, React
OS: Unix/Linux, iOS
Remote: Zoom, Google Meets, Slack, Discord"""

# Function to clean and preprocess text
def preprocess_text(text):
    text = re.sub(r'\W', ' ', text.lower())
    return text

# Function to calculate cosine similarity between job description and resumes
def calculate_similarity(job_description, resume):
    processed_job_desc = preprocess_text(job_description)
    processed_resume = preprocess_text(extract_text_from_pdf(resume))

    vectorizer = CountVectorizer().fit_transform([processed_job_desc, processed_resume])
    vectors = vectorizer.toarray()

    similarity_matrix = cosine_similarity(vectors)

    return similarity_matrix[0][1]

# Sample resumes
resume1 = r"C:\Users\2020r\Desktop\mini pro\full stack development.pdf"
resume2 = r"C:\Users\2020r\Desktop\mini pro\testing.pdf"
resume3 = r"C:\Users\2020r\Desktop\mini pro\Data Analysis.pdf"

# Resumes
resumes = [resume1, resume2, resume3]

# Calculate similarity scores for all resumes
similarity_scores = [calculate_similarity(job_description, resume) for resume in resumes]

# Recommend the best-matching resume
best_match_index = similarity_scores.index(max(similarity_scores))
recommended_resume = resumes[best_match_index]

# Display the result
print("Job Description:\n", job_description)
print("\nRecommended Resume:\n", recommended_resume)
# Display the full content of the recommended resume
print("\nFull Content of Recommended Resume:\n")
with open(recommended_resume, 'rb') as file:
    reader = PyPDF2.PdfReader(file)
    for page in reader.pages:
        print(page.extract_text())
        
print("\nSimilarity Score:", max(similarity_scores))


Job Description:
 
OBJECTIVE: Seeking internship in testing
EDUCATION B.Tech, Software Engineering
May 2020
DIT University ,DEHRADUN , GPA: 8.2
A.S., Computer Science Jun 20XX
DeAnza College, Cupertino, CA, GPA: 37.8
TECHNICAL SKILLS
Programming: Java, Git, C, Python,C++,HTML, JavaScript, Swift, MySQL
Frameworks: AngularJS, React
OS: Unix/Linux, iOS
Remote: Zoom, Google Meets, Slack, Discord

Recommended Resume:
 C:\Users\2020r\Desktop\mini pro\testing.pdf

Full Content of Recommended Resume:

Resume Sample  (Software Engineering, pre -career)  
                               AKSHA Y SINGHANIA  
DIT University , DEHRADUN 95192 | ( 91)34546727  | akshay @gmail.com | linkedin/in/imaspartan    
 OBJECTIVE: Seeking internship in testing  
EDUCATION B. Tech, Software Engineering                               
May 20 20 
DIT University ,DEHRADUN , GPA: 8.2 
A.S., Computer Science                              Jun 20XX  
DeAnza College, Cupertino, CA, GPA: 3 7.8 
TECHNICAL SKILLS  
Programming

In [1]:
import PyPDF2
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import tkinter as tk
from tkinter import filedialog, Text

# Function to extract text from PDF files
def extract_text_from_pdf(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text()
    return text

# Function to clean and preprocess text
def preprocess_text(text):
    text = re.sub(r'\W', ' ', text.lower())
    return text

# Function to calculate cosine similarity between job description and resumes
def calculate_similarity(job_description, resume):
    processed_job_desc = preprocess_text(job_description)
    processed_resume = preprocess_text(extract_text_from_pdf(resume))

    vectorizer = CountVectorizer().fit_transform([processed_job_desc, processed_resume])
    vectors = vectorizer.toarray()

    similarity_matrix = cosine_similarity(vectors)

    return similarity_matrix[0][1]

# Function to handle the resume selection
def select_resume():
    resume_path = filedialog.askopenfilename(initialdir="/", title="Select Resume",
                                             filetypes=(("PDF files", "*.pdf"), ("all files", "*.*")))
    resume_listbox.insert(tk.END, resume_path)

# Function to find and display the best-matching resume
def find_best_match():
    job_description = job_desc_entry.get("1.0",'end-1c')  # Get job description from text widget
    resumes = list(resume_listbox.get(0, tk.END))  # Get the list of selected resumes

    if not job_description or not resumes:
        result_label.config(text="Please provide job description and select at least one resume.")
        return

    similarity_scores = [calculate_similarity(job_description, resume) for resume in resumes]
    best_match_index = similarity_scores.index(max(similarity_scores))
    recommended_resume = resumes[best_match_index]

    result_label.config(text=f"Recommended Resume: {recommended_resume}\nSimilarity Score: {max(similarity_scores)}")

# Create the main application window
root = tk.Tk()
root.title("Resume Matcher")

# Create and place widgets
job_desc_label = tk.Label(root, text="Job Description:")
job_desc_label.pack()

job_desc_entry = tk.Text(root, height=5, width=50)
job_desc_entry.pack()

resume_label = tk.Label(root, text="Select Resumes:")
resume_label.pack()

resume_listbox = tk.Listbox(root, selectmode=tk.MULTIPLE, height=5, width=50)
resume_listbox.pack()

select_resume_button = tk.Button(root, text="Select Resume", command=select_resume)
select_resume_button.pack()

match_button = tk.Button(root, text="Find Best Match", command=find_best_match)
match_button.pack()

result_label = tk.Label(root, text="")
result_label.pack()

# Start the main event loop
root.mainloop()
