In [2]:
!pip install pymupdf


Collecting pymupdf
  Downloading pymupdf-1.26.3-cp39-abi3-win_amd64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.3-cp39-abi3-win_amd64.whl (18.7 MB)
   ---------------------------------------- 0.0/18.7 MB ? eta -:--:--
   -- ------------------------------------- 1.0/18.7 MB 12.7 MB/s eta 0:00:02
   --- ------------------------------------ 1.8/18.7 MB 6.3 MB/s eta 0:00:03
   ------ --------------------------------- 2.9/18.7 MB 5.2 MB/s eta 0:00:04
   -------- ------------------------------- 4.2/18.7 MB 5.4 MB/s eta 0:00:03
   ----------- ---------------------------- 5.2/18.7 MB 5.6 MB/s eta 0:00:03
   ------------- -------------------------- 6.3/18.7 MB 5.4 MB/s eta 0:00:03
   --------------- ------------------------ 7.3/18.7 MB 5.3 MB/s eta 0:00:03
   ----------------- ---------------------- 8.1/18.7 MB 5.1 MB/s eta 0:00:03
   ------------------- -------------------- 9.2/18.7 MB 5.0 MB/s eta 0:00:02
   --------------------- ------------------ 10.0/18.7 MB 5.0 MB/s eta 0:00:02
   ----

In [8]:
import torch
from transformers import BertTokenizerFast, BertForSequenceClassification
import fitz  # PyMuPDF
import spacy

# 📦 Load model and tokenizer
model_dir = r"C:\Users\sagni\Downloads\Resume Selector\resume_model"
tokenizer = BertTokenizerFast.from_pretrained(model_dir)
model = BertForSequenceClassification.from_pretrained(model_dir)
model.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 🔥 Define your label mapping (from your training code)
id2label = {0: 'Data Science', 1: 'HR', 2: 'Advocate', 3: 'Arts', 4: 'Web Designing',
            5: 'Mechanical Engineer', 6: 'Sales', 7: 'Health and fitness', 8: 'Civil Engineer',
            9: 'Java Developer', 10: 'Business Analyst', 11: 'SAP Developer',
            12: 'Automation Testing', 13: 'Electrical Engineering', 14: 'Operations Manager',
            15: 'Python Developer', 16: 'DevOps Engineer', 17: 'Network Security Engineer',
            18: 'PMO', 19: 'Database', 20: 'Hadoop', 21: 'ETL Developer',
            22: 'DotNet Developer', 23: 'Blockchain', 24: 'Testing'}

# 🧹 Preprocess function
nlp = spacy.load('en_core_web_sm')
def preprocess(text):
    doc = nlp(str(text).lower())
    return ' '.join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct])

# 📄 Extract text from PDF
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text.strip()

# 🔮 Predict category & give suggestions
def predict_resume_from_pdf(pdf_path, threshold=0.85):
    text = extract_text_from_pdf(pdf_path)
    if not text:
        print("❌ No text found in the PDF!")
        return
    
    print("📄 Extracted Resume Text (first 500 chars):\n", text[:500], "...\n")

    # Preprocess & tokenize
    clean_text = preprocess(text)
    encoding = tokenizer(clean_text, truncation=True, padding='max_length', max_length=256, return_tensors='pt')
    encoding = {k: v.to(device) for k, v in encoding.items()}

    # Prediction
    with torch.no_grad():
        logits = model(**encoding).logits
        probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
        pred_idx = torch.argmax(torch.tensor(probs)).item()
        confidence = probs[pred_idx]
        predicted_category = id2label[pred_idx]

    print(f"🔮 Predicted Category: {predicted_category}")
    print(f"📈 Confidence Score: {confidence * 100:.2f}%")

    if confidence < threshold:
        print("\n⚠️ Suggestion: Confidence is below threshold.")
        print("🔧 Consider adding more role-specific keywords or projects.")
    else:
        print("✅ Resume looks strong for this role!")

# 📥 Example Usage
pdf_resume_path = r"C:\Users\sagni\Downloads\Sadr_Annan_Resume.pdf"
predict_resume_from_pdf(pdf_resume_path)


📄 Extracted Resume Text (first 500 chars):
 ANNAN SADR 
Delhi, India • annansadr.ca@gmail.com • (+91) 75640 38169  
 
Career Summary 
 
Software Engineer with strong expertise in developing, deploying, and optimizing scalable applications, focusing on cloud-
based solutions, microservices architecture, and distributed systems reliability. Skilled in designing and implementing 
RESTful APIs, integrating machine learning algorithms, and managing databases with both SQL and NoSQL platforms. 
Proven track record in implementing AI application ...

🔮 Predicted Category: Java Developer
📈 Confidence Score: 62.93%

⚠️ Suggestion: Confidence is below threshold.
🔧 Consider adding more role-specific keywords or projects.
