In [3]:
import os
import docx2txt
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text()
    return text

def extract_text_from_docx(file_path):
    return docx2txt.process(file_path)

def extract_text_from_txt(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def extract_text(file_path):
    if file_path.endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.endswith('.docx'):
        return extract_text_from_docx(file_path)
    elif file_path.endswith('.txt'):
        return extract_text_from_txt(file_path)
    else:
        return ""

def main():
    # Input job description
    job_description = input("Enter the job description: ")
    if not job_description.strip():
        print("Job description cannot be empty!")
        return

    # Input resume files
    resume_files = input("Enter the paths to the resume files, separated by commas: ").split(',')
    resumes = []
    for file_path in resume_files:
        file_path = file_path.strip()
        if not os.path.exists(file_path):
            print(f"Resume file not found: {file_path}")
            return
        resumes.append(extract_text(file_path))

    # Check inputs
    if not resumes:
        print("Please provide at least one resume.")
        return

    # Vectorize job description and resumes
    vectorizer = TfidfVectorizer().fit_transform([job_description] + resumes)
    vectors = vectorizer.toarray()

    # Calculate cosine similarities
    job_vector = vectors[0]
    resume_vectors = vectors[1:]
    similarities = cosine_similarity([job_vector], resume_vectors)[0]

    # Display similarity scores
    print("\nSimilarity Scores:")
    for i, score in enumerate(similarities):
        print(f"{resume_files[i].strip()}: {round(score, 2)}")

    # Display top matches
    top_indices = similarities.argsort()[-3:][::-1]
    print("\nTop Matching Resumes:")
    for i in top_indices:
        print(f"{resume_files[i].strip()} (Score: {round(similarities[i], 2)})")


if __name__ == "__main__":
    main()

Resume file not found: Resume-Screening-with-Machine-Learning-Job-Recommendations-Parsing-Categorization-main/Teacher.pdf


In [None]:
#Resume-Screening-with-Machine-Learning-Job-Recommendations-Parsing-Categorization-main/designer.pdf

#Resume-Screening-with-Machine-Learning-Job-Recommendations-Parsing-Categorization-main/Teacher.pdf