In [4]:
# Install required packages
!pip uninstall -y fitz -q
!pip install pymupdf pandas scikit-learn -q

# Import correct libraries
import pymupdf
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os

# Fix pymupdf import
fitz = pymupdf  # to use fitz.open()

#File paths (upload these from left side panel)
resume_files = {
    "MD_RIZWAN_RESUME.pdf": "/content/MD_RIZWAN_RESUME.pdf",
    "sabiha_resume.pdf": "/content/sabiha_resume.pdf"
}

#Job Description
job_description = """
We are hiring a Python Developer skilled in Pandas, NumPy, Scikit-learn,
machine learning, and deployment tools like Flask or Streamlit.
"""

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Process resumes
resume_texts = []
resume_names = []

for name, path in resume_files.items():
    if os.path.exists(path):
        text = extract_text_from_pdf(path)
        resume_texts.append(text)
        resume_names.append(name)
    else:
        print(f"❌ File not found: {name}")

# TF-IDF Matching
documents = [job_description] + resume_texts
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(documents)
scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()

# Show result & save
df = pd.DataFrame({
    'Resume': resume_names,
    'Match (%)': (scores * 100).round(2)
}).sort_values(by='Match (%)', ascending=False)

df.to_csv("ranked_output.csv", index=False)
print("✅ Resume Ranking Complete!")
print(df)


[0m✅ Resume Ranking Complete!
                 Resume  Match (%)
0  MD_RIZWAN_RESUME.pdf      10.69
1     sabiha_resume.pdf       2.91
