In [1]:
!pip install nltk scikit-learn



In [2]:
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

def clean_text(text):
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    tokens = text.lower().split()
    tokens = [w for w in tokens if w not in stopwords.words('english')]
    return ' '.join(tokens)

def extract_features(resume_texts, job_desc_text):
    corpus = resume_texts + [job_desc_text]
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform(corpus)
    resume_vecs = vectors[:-1]
    job_vec = vectors[-1]
    return resume_vecs, job_vec

def rank_resumes(resume_vecs, job_vec):
    scores = cosine_similarity(resume_vecs, job_vec)
    return scores

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [3]:
# List of sample resume texts (paste your real resumes here)
resumes = [
    "Experienced data scientist skilled in Python, machine learning, and statistics.",
    "Software engineer with expertise in C++, Java, and system design.",
    "Python developer with experience in machine learning, deep learning, and data analysis."
]

# Sample job description (paste your real job description here)
job_description = """
We are looking for a Python developer with experience in machine learning and data analysis.
Familiarity with deep learning and statistics is a plus.
"""

# Preprocess
resumes_clean = [clean_text(r) for r in resumes]
job_desc_clean = clean_text(job_description)

# Feature Extraction
resume_vecs, job_vec = extract_features(resumes_clean, job_desc_clean)

# Score resumes
scores = rank_resumes(resume_vecs, job_vec)

# Show ranking
import numpy as np
ranking = np.argsort(scores[:,0])[::-1]
for idx in ranking:
    print(f"Resume {idx+1}: Score = {scores[idx][0]:.2f}\nText: {resumes[idx]}\n")

Resume 3: Score = 0.77
Text: Python developer with experience in machine learning, deep learning, and data analysis.

Resume 1: Score = 0.39
Text: Experienced data scientist skilled in Python, machine learning, and statistics.

Resume 2: Score = 0.00
Text: Software engineer with expertise in C++, Java, and system design.

