<a href="https://colab.research.google.com/github/rahmamohax/Elevvo-Tasks/blob/master/Resume%20Screening%20Using%20NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task 8: Resume Screening Using NLP

In [20]:
import kagglehub
import os
import pandas as pd

resume_path = kagglehub.dataset_download("snehaanbhawal/resume-dataset")
print("Path to dataset files:", resume_path)

resumes_df = pd.read_csv(os.path.join(resume_path,"Resume/Resume.csv"))
resumes_df.head()

Path to dataset files: /kaggle/input/resume-dataset


Unnamed: 0,ID,Resume_str,Resume_html,Category
0,16852973,HR ADMINISTRATOR/MARKETING ASSOCIATE\...,"<div class=""fontsize fontface vmargins hmargin...",HR
1,22323967,"HR SPECIALIST, US HR OPERATIONS ...","<div class=""fontsize fontface vmargins hmargin...",HR
2,33176873,HR DIRECTOR Summary Over 2...,"<div class=""fontsize fontface vmargins hmargin...",HR
3,27018550,HR SPECIALIST Summary Dedica...,"<div class=""fontsize fontface vmargins hmargin...",HR
4,17812897,HR MANAGER Skill Highlights ...,"<div class=""fontsize fontface vmargins hmargin...",HR


In [21]:
job_path = kagglehub.dataset_download("ravindrasinghrana/job-description-dataset")
print("Path to dataset files:", job_path )

jobs_df = pd.read_csv(os.path.join(job_path,"job_descriptions.csv"))
jobs_df.head()

Path to dataset files: /kaggle/input/job-description-dataset


Unnamed: 0,Job Id,Experience,Qualifications,Salary Range,location,Country,latitude,longitude,Work Type,Company Size,...,Contact,Job Title,Role,Job Portal,Job Description,Benefits,skills,Responsibilities,Company,Company Profile
0,1089843540111562,5 to 15 Years,M.Tech,$59K-$99K,Douglas,Isle of Man,54.2361,-4.5481,Intern,26801,...,001-381-930-7517x737,Digital Marketing Specialist,Social Media Manager,Snagajob,Social Media Managers oversee an organizations...,"{'Flexible Spending Accounts (FSAs), Relocatio...","Social media platforms (e.g., Facebook, Twitte...","Manage and grow social media accounts, create ...",Icahn Enterprises,"{""Sector"":""Diversified"",""Industry"":""Diversifie..."
1,398454096642776,2 to 12 Years,BCA,$56K-$116K,Ashgabat,Turkmenistan,38.9697,59.5563,Intern,100340,...,461-509-4216,Web Developer,Frontend Web Developer,Idealist,Frontend Web Developers design and implement u...,"{'Health Insurance, Retirement Plans, Paid Tim...","HTML, CSS, JavaScript Frontend frameworks (e.g...","Design and code user interfaces for websites, ...",PNC Financial Services Group,"{""Sector"":""Financial Services"",""Industry"":""Com..."
2,481640072963533,0 to 12 Years,PhD,$61K-$104K,Macao,"Macao SAR, China",22.1987,113.5439,Temporary,84525,...,9687619505,Operations Manager,Quality Control Manager,Jobs2Careers,Quality Control Managers establish and enforce...,"{'Legal Assistance, Bonuses and Incentive Prog...",Quality control processes and methodologies St...,Establish and enforce quality control standard...,United Services Automobile Assn.,"{""Sector"":""Insurance"",""Industry"":""Insurance: P..."
3,688192671473044,4 to 11 Years,PhD,$65K-$91K,Porto-Novo,Benin,9.3077,2.3158,Full-Time,129896,...,+1-820-643-5431x47576,Network Engineer,Wireless Network Engineer,FlexJobs,"Wireless Network Engineers design, implement, ...","{'Transportation Benefits, Professional Develo...",Wireless network design and architecture Wi-Fi...,"Design, configure, and optimize wireless netwo...",Hess,"{""Sector"":""Energy"",""Industry"":""Mining, Crude-O..."
4,117057806156508,1 to 12 Years,MBA,$64K-$87K,Santiago,Chile,-35.6751,-71.5429,Intern,53944,...,343.975.4702x9340,Event Manager,Conference Manager,Jobs2Careers,A Conference Manager coordinates and manages c...,"{'Flexible Spending Accounts (FSAs), Relocatio...",Event planning Conference logistics Budget man...,Specialize in conference and convention planni...,Cairn Energy,"{""Sector"":""Energy"",""Industry"":""Energy - Oil & ..."


In [22]:
resumes_df = resumes_df.rename(columns={'Resume_str': 'resume_text'})
jobs_df = jobs_df.rename(columns={'Job Description': 'job_text'})

In [23]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('all-MiniLM-L6-v2',device="cuda")

resume_embeddings = model.encode(resumes_df['resume_text'].tolist(), convert_to_tensor=True)
# job_embeddings = model.encode(job_df['job_text'].tolist(), convert_to_tensor=True)

In [24]:
job_embeddings = model.encode(
    jobs_df['job_text'].tolist(),
    batch_size=128,
    convert_to_tensor=True
)

In [25]:
import torch

def match_resumes(job_index, top_n=5):
    job_vec = job_embeddings[job_index].unsqueeze(0)
    scores = cosine_similarity(job_vec.cpu(), resume_embeddings.cpu())[0]

    top_indices = scores.argsort()[::-1][:top_n]
    results = []
    for idx in top_indices:
        results.append({
            "Resume ID": idx,
            "Score": round(scores[idx]*100, 2),
            "Resume Text": resumes_df.iloc[idx]['resume_text'][:200] + "..."
        })
    return pd.DataFrame(results)

# Match resumes for the first job posting
match_resumes(0, top_n=5)


Unnamed: 0,Resume ID,Score,Resume Text
0,2101,69.760002,PUBLIC RELATIONS/SOCIAL MEDIA MANAGEM...
1,1619,65.709999,PR & EVENT MANAGER Summary E...
2,2127,65.209999,SOCIAL MEDIA & COMMUNICATIONS MANAGER...
3,1267,64.919998,SOCIAL MEDIA MARKETING MANAGER ...
4,1283,63.130001,DIRECTOR OF SOCIAL MEDIA MARKETING ...


Extract Named Entities (Skills, Experience)

In [26]:
import spacy
nlp = spacy.load("en_core_web_sm")

def extract_entities(text):
    doc = nlp(text)
    skills = [ent.text for ent in doc.ents if ent.label_ in ["ORG", "PRODUCT", "WORK_OF_ART"]]
    return skills

resumes_df['skills'] = resumes_df['resume_text'].apply(extract_entities)


In [27]:
print(resumes_df['skills'])

0       [Team, DOT, IHG, Customer Loyalty and Marketin...
1       [Communications, Marketing, Human Resources an...
2       [Human Resources Executive Management, HRIS, H...
3       [10-Key, Microsoft, Excel, Access, Outlook, Po...
4       [Mediation & Advocacy  HR Policies & Procedure...
                              ...                        
2479    [SGT/E-5, KEY, Current Secret  Clearance, Cont...
2480    [COMMUNICATIONS, Current     Government Relati...
2481    [Qualifications        Windows / Mac /, Malwar...
2482    [the Executive & Advisory Board of Directors, ...
2483    [Pioneer Packaging, Inventory Control and Flee...
Name: skills, Length: 2484, dtype: object
