In [None]:
import re
import string
from pdfminer.high_level import extract_text
from sklearn.feature_extraction.text import CountVectorizer, ENGLISH_STOP_WORDS
from sklearn.metrics.pairwise import cosine_similarity

#Clean text function
def clean_text(text):
    text = text.lower()
    text = re.sub(f"[{string.punctuation}]", "", text)
    return text

#Analyze against multiple roles
def find_best_matching_role(resume_pdf_path, roles_dict):
    try:
        resume_text = extract_text(resume_pdf_path)
    except Exception as e:
        print(f"⚠ Error reading PDF: {e}")
        return

    resume_clean = clean_text(resume_text)

    scores = []

    for role, jd_text in roles_dict.items():
        jd_clean = clean_text(jd_text)

        # Vectorize
        vectorizer = CountVectorizer().fit_transform([jd_clean, resume_clean])
        vectors = vectorizer.toarray()
        similarity = cosine_similarity([vectors[0]], [vectors[1]])[0][0]

        scores.append((role, round(similarity * 100, 2)))

    # Sort roles by match %
    scores.sort(key=lambda x: x[1], reverse=True)

    #Best matching role
    best_role, best_score = scores[0]
    print("\nBest Matching Role Based on Your Resume:")
    print(f"{best_role} — Match Score: {best_score}%")

    print("\nFull Role Match Summary:")
    for role, score in scores:
        print(f"🔹 {role}: {score}%")

# === Define multiple software roles ===

roles = {
    "Backend Developer": """
        Experience in Python, Java, REST APIs, SQL, Django/Flask, Git, and Docker.
        Strong understanding of databases, caching, and server-side logic.
    """,
    "Frontend Developer": """
        Proficient in JavaScript, React, HTML, CSS, Figma.
        Experience in building responsive UI, web design principles, and performance optimization.
    """,
    "Machine Learning Engineer": """
        Strong foundation in Python, scikit-learn, pandas, NumPy, machine learning algorithms.
        Experience with model training, deployment, and data preprocessing.
    """,
    "DevOps Engineer": """
        Experience in AWS, Docker, CI/CD, Jenkins, Kubernetes, Git, monitoring, and scripting.
        Understanding of system architecture and deployment pipelines.
    """,
    "Data Analyst": """
        Skilled in SQL, Excel, Python, Power BI/Tableau, data visualization, and reporting.
        Experience with data cleaning, ETL, and business metrics analysis.
    """
}

# === 🖨 Run the program ===
print("=== Resume Role Matcher (PDF Only) ===")
resume_path = input("Enter full path to your resume PDF file: ")

find_best_matching_role(resume_path, roles)

=== Resume Role Matcher (PDF Only) ===
