In [3]:
import spacy
nlp = spacy.load("en_core_web_sm")

text = "Experienced Python developer with Flask, React, and NLP tools."

doc = nlp(text)
tokens = [token.lemma_ for token in doc if not token.is_stop and token.is_alpha]
print(tokens)


['experience', 'Python', 'developer', 'Flask', 'React', 'NLP', 'tool']


In [16]:
!pip install PyMuPDF




[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [19]:
import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Example usage
resume_text = extract_text_from_pdf("MANASVI M RESUME.pdf")
print(resume_text[:1000])  # print first 1000 chars


MANASVI M 
Mail: manasvi.mitblr2023@learner.manipal.edu     Mobile no: 9113060454 
CAREER OBJECTIVE  
Computer Science student at Manipal Academy of Higher Education Bangalore, completed my 
2nd year, seeking opportunities to apply and expand my technical knowledge through 
internships and collaborative projects in software development and research. 
EDUCATION  
Bachelor of Technology (B.Tech) in Computer Science Manipal Academy of Higher Education 
Second Year (2023- Present) 
TECHNICAL SKILLS  
Programming Languages: C, C++ ,Java , Basic Knowledge in Python and AI 
Web Development: React.js ,HTML , CSS  
Databases: MongoDB ,MySQL  
Concepts: Data Structures & Algorithms , Object-Oriented Programming, Database 
Management Systems 
PROJECTS  
Online Thrift  Marketplace 
A web-based platform where users can buy and sell second-hand clothes. 
 
Frontend: HTML, CSS, React.js 
 
Backend: Node.js 
 
Database: MongoDB 
 
Developed a full-stack project where users can buy and sell product

In [20]:
doc = nlp(resume_text)
skills = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN") and not token.is_stop]
print(set(skills))  # Using set to remove duplicates


{'OBJECTIVE', 'B.Tech', 'projects', 'Higher', 'Manipal', 'MongoDB', 'stack', 'college', 'Web', 'Programming', 'Year', 'Communication', 'software', 'Marketplace', 'Present', 'research', 'MySQL', 'PROJECTS', 'person', 'internships', 'views', 'AI', 'C++', 'EVENTS', 'Skills', 'Strong', 'data', 'Databases', 'Education', 'PARTICIPATION', 'users', 'English', 'Online', 'Node.js', 'platform', 'Java', 'Collaboration', 'hands', 'Algorithms', 'Structures', 'CSS', 'decision', 'Development', '2023-', 'C', 'Object', 'Technology', 'project', 'Backend', 'desktop', 'TECHNICAL', 'Hindi', 'Academy', 'React.js', 'Languages', 'solutions', 'manasvi.mitblr2023@learner.manipal.edu', 'MANASVI', 'Bachelor', 'Computer', 'mobile', 'Kannada', 'opportunities', 'Python', 'EDUCATION', 'Science', 'knowledge', 'challenges', 'student', 'development', 'hand', 'web', 'Second', 'Systems', 'Frontend', 'Bangalore', 'Soft', 'products', 'event', 'Concepts', 'Data', 'SKILLS', 'HTML', 'Thrift', 'insights', 'Mail', 'Participated',

In [21]:
for ent in doc.ents:
    print(ent.text, "->", ent.label_)


MANASVI M 
Mail: manasvi.mitblr2023@learner.manipal.edu -> ORG
9113060454 -> CARDINAL
Manipal Academy of Higher Education Bangalore -> ORG
2nd year -> DATE
Bachelor of Technology -> ORG
B.Tech -> ORG
Computer Science Manipal Academy -> ORG
Second Year -> DATE
C++ -> PERSON
Java -> PERSON
AI -> GPE
HTML -> ORG
CSS -> ORG
Data Structures & Algorithms -> ORG
Object-Oriented Programming -> ORG
Database 
Management Systems 
PROJECTS  
Online Thrift  Marketplace -> ORG
second -> ORDINAL
CSS -> ORG
UI -> ORG
English -> LANGUAGE
Kannada -> PERSON
Hindi -> GPE
 Soft Skills -> PERSON


In [22]:
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from heapq import nlargest

def summarize_text(text, n=3):
    doc = nlp(text)
    word_freq = {}
    for word in doc:
        if word.text.lower() not in STOP_WORDS and word.text.lower() not in punctuation:
            word_freq[word.text.lower()] = word_freq.get(word.text.lower(), 0) + 1

    sentence_strength = {}
    for sent in doc.sents:
        for word in sent:
            if word.text.lower() in word_freq:
                sentence_strength[sent] = sentence_strength.get(sent, 0) + word_freq[word.text.lower()]

    summary = nlargest(n, sentence_strength, key=sentence_strength.get)
    return ' '.join([str(s) for s in summary])

print(summarize_text(resume_text, 3))



EDUCATION  
Bachelor of Technology (B.Tech) in Computer Science Manipal Academy of Higher Education 
Second Year (2023- Present) 
TECHNICAL SKILLS  
Programming Languages: C, C++ ,Java , Basic Knowledge in Python and AI 
Web Development: React.js ,HTML , CSS  
Databases: MongoDB ,MySQL  
Concepts: Data Structures & Algorithms , Object-Oriented Programming, Database 
Management Systems 
PROJECTS  
Online Thrift  Marketplace 
A web-based platform where users can buy and sell second-hand clothes. 
 MANASVI M 
Mail: manasvi.mitblr2023@learner.manipal.edu     Mobile no: 9113060454 
CAREER OBJECTIVE  
Computer Science student at Manipal Academy of Higher Education Bangalore, completed my 
2nd year, seeking opportunities to apply and expand my technical knowledge through 
internships and collaborative projects in software development and research. 
 Node.js 
 
Database: MongoDB 
 
Developed a full-stack project where users can buy and sell products. 



In [25]:
job_description = """
Looking for a Python developer with knowledge in Flask, React, MongoDB, and NLP tools like spaCy.
"""


In [26]:
# Function to extract keywords from text
def extract_keywords(text):
    doc = nlp(text)
    return set(
        token.text.lower()
        for token in doc
        if token.pos_ in ("NOUN", "PROPN") and not token.is_stop and token.is_alpha
    )

# Extract keywords from both job description and resume
jd_keywords = extract_keywords(job_description)
resume_keywords = extract_keywords(resume_text)

# Calculate overlap
matched_keywords = jd_keywords & resume_keywords
score = len(matched_keywords) / len(jd_keywords) if jd_keywords else 0

# Display results
print(" Job Description Keywords:", jd_keywords)
print(" Resume Keywords:", resume_keywords)
print(" Matched Keywords:", matched_keywords)
print(f" Similarity Score: {score:.2f}")


 Job Description Keywords: {'python', 'nlp', 'flask', 'knowledge', 'react', 'tools', 'developer'}
 Resume Keywords: {'participation', 'technical', 'html', 'projects', 'stack', 'college', 'software', 'workshop', 'second', 'research', 'participated', 'english', 'higher', 'java', 'person', 'internships', 'views', 'programming', 'systems', 'concepts', 'bangalore', 'strong', 'bachelor', 'team', 'languages', 'algorithms', 'science', 'marketplace', 'data', 'events', 'basic', 'database', 'users', 'present', 'python', 'platform', 'kannada', 'computer', 'hands', 'thinking', 'ai', 'decision', 'mysql', 'frontend', 'management', 'object', 'structures', 'project', 'thrift', 'desktop', 'solutions', 'objective', 'databases', 'hindi', 'mobile', 'online', 'opportunities', 'details', 'mongodb', 'knowledge', 'challenges', 'student', 'development', 'hand', 'web', 'ui', 'products', 'event', 'css', 'education', 'insights', 'technology', 'm', 'clothes', 'soft', 'communication', 'c', 'backend', 'critical', 'ta