In [4]:
# Required Libraries
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display
import ipywidgets as widgets

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load Dataset
df = pd.read_csv('C:/Users/sudha/job decr/jb_df.csv')  # <-- update path if needed
df['Job Description'] = df['Job Description'].fillna('')

# Preprocessing Function
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    tokens = word_tokenize(text)
    cleaned = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]
    return " ".join(cleaned)

# Clean Descriptions
df['clean'] = df['Job Description'].apply(preprocess)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer()
job_vectors = vectorizer.fit_transform(df['clean'])

# File Upload Widget


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sudha\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sudha\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sudha\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
upload_widget = widgets.FileUpload(accept='.txt', multiple=False)
display(upload_widget)

resume_text = ""

def read_uploaded_file(change):
    global resume_text
    for filename, fileinfo in upload_widget.value.items():
        resume_text = fileinfo['content'].decode('utf-8', errors='ignore')
        print(f"File Name: {filename}\n")
        print("Resume Content:\n")
        print(resume_text)

upload_widget.observe(read_uploaded_file, names='value')


FileUpload(value={}, accept='.txt', description='Upload')

📄 File Name: sample_resume.txt

📑 Resume Content:

I am experienced in Python programming, data analysis, and have worked on machine learning models using scikit-learn and TensorFlow. I also have exposure to cloud technologies like AWS.


In [7]:
# Resume Matching
if resume_text.strip():
    resume_clean = preprocess(resume_text)
    resume_vec = vectorizer.transform([resume_clean])
    
    sim_scores = cosine_similarity(resume_vec, job_vectors).flatten()
    df['similarity'] = sim_scores

    top_matches = df.sort_values(by='similarity', ascending=False).head(5)

    print("\nTop 5 Job Matches:\n")
    for _, row in top_matches.iterrows():
        print(f" Job Title: {row['Job Title']}")
        print(f" Match Score: {row['similarity']:.2f}")
        print(f" Description: {row['Job Description'][:300]}...\n")
else:
    print(" No resume uploaded or empty file.")



Top 5 Job Matches:

 Job Title: Data Scientist
 Match Score: 0.46
 Description: Machine Learning Engineers develop machine learning models and algorithms, working on tasks like data preprocessing, model training, and deployment....

 Job Title: Data Scientist
 Match Score: 0.46
 Description: Machine Learning Engineers develop machine learning models and algorithms, working on tasks like data preprocessing, model training, and deployment....

 Job Title: Data Scientist
 Match Score: 0.46
 Description: Machine Learning Engineers develop machine learning models and algorithms, working on tasks like data preprocessing, model training, and deployment....

 Job Title: Data Scientist
 Match Score: 0.46
 Description: Machine Learning Engineers develop machine learning models and algorithms, working on tasks like data preprocessing, model training, and deployment....

 Job Title: Data Scientist
 Match Score: 0.46
 Description: Machine Learning Engineers develop machine learning models and algo