In [3]:
pip install pandas spacy matplotlib scikit-learn


Collecting spacy
  Downloading spacy-3.8.5-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.12-cp312-cp312-win_amd64.whl.metadata (2.2 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.11-cp312-cp312-win_amd64.whl.metadata (8.8 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.9-cp312-cp312-win_amd64.whl.metadata (2.2 kB)
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Downloading thinc-8.3.6-cp312-cp312-win_amd64.whl.metadata (15 kB)
Collecting wasabi<1.2.0,>=0.9.1 (from spacy)
  Downloading wasabi-1.1.3-py3-none-any.whl.metadata (28 kB)
Collecting srsly<3.0.0,>=2.4.3 (from spacy)
  Downloading srsly-2.5.1-cp312-cp312-win_amd6

In [5]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
      --------------------------------------- 0.3/12.8 MB ? eta -:--:--
     - -------------------------------------- 0.5/12.8 MB 1.3 MB/s eta 0:00:10
     --- ------------------------------------ 1.0/12.8 MB 2.0 MB/s eta 0:00:06
     ---- ----------------------------------- 1.6/12.8 MB 2.1 MB/s eta 0:00:06
     ----- ---------------------------------- 1.8/12.8 MB 2.1 MB/s eta 0:00:06
     ------- -------------------------------- 2.4/12.8 MB 2.0 MB/s eta 0:00:06
     --------- ------------------------------ 2.9/12.8 MB 2.1 MB/s eta 0:00:05
     ---------- ----------------------------- 3.4/12.8 MB 2.1 MB/s eta 0:00:05
     ------------ --------------------------- 3.9/12.8 MB 2.2 MB/s eta 0:00:05
     ------------- -------------------------- 4.

In [6]:
# üì¶ Imports
import spacy
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

In [9]:
# üìù Sample Resume (can be read from file later)
resume_text = """
NIKHIL .M. SUTAR
Innovative developer with expertise in C, Java, Python, and JavaScript. Proficient in utilizing frameworks such
as Flask, Django, and Bootstrap, with ongoing learning in React.js. Experienced in version control with Git and
GitHub and adept at deploying projects on Vercel. Committed to leveraging AI tools, including ChatGPT, to
enhance project outcomes.

Languages : C , Java , Python , Javascript, Sql
Frontend : Bootstrap , React.js
Backend : Node.js , Flask , Django
Python Libraries : Pandas , Numpy , Matplotlib, Seaborn
Artificial Intelligence : Scikit-Learn, Machine Learning, LLMs , Hugging Face , Transformers

Projects:
1. SaleGyaan : AI-Based E-commerce Sales Prediction - Flask, Python, ML
2. Success Classes of Engineering ‚Äì Flask, HTML, CSS
3. Pranav Interiors ‚Äì Django, PostgreSQL
4. Portfolio Website ‚Äì Tailwind, HTML, JS

EDUCATION:
Artificial Intelligence and Data Science | CGPA: 6.70 | 2023 - 2026
"""

In [10]:
# üîç NLP Processing
doc = nlp(resume_text)

In [11]:
# Extracted Entities (Rough Basic Version)
skills = []
for ent in doc.ents:
    if ent.label_ == "ORG" or ent.label_ == "PRODUCT":
        skills.append(ent.text)

# Or simple extraction
keywords = ['Python', 'Java', 'C', 'JavaScript', 'Django', 'Flask', 'React', 'Machine Learning', 'LLM', 'SQL']
detected_skills = [word for word in keywords if word.lower() in resume_text.lower()]

print("‚úÖ Detected Skills:", detected_skills)

‚úÖ Detected Skills: ['Python', 'Java', 'C', 'JavaScript', 'Django', 'Flask', 'React', 'Machine Learning', 'LLM', 'SQL']


In [12]:
# üéØ Career Role Template Database
roles = {
    "Machine Learning Engineer": ["Python", "Scikit-Learn", "Pandas", "Numpy", "ML", "Hugging Face", "LLM"],
    "Full Stack Developer": ["HTML", "CSS", "JavaScript", "React", "Django", "Flask", "SQL"],
    "AI Research Assistant": ["Python", "Transformers", "LLMs", "Deep Learning", "NLP"],
}

In [13]:
# üîó Similarity Scoring
def match_score(user_skills, role_skills):
    return len(set(user_skills).intersection(set(role_skills))) / len(role_skills)

suggestions = []
for role, skills_required in roles.items():
    score = match_score(detected_skills, skills_required)
    suggestions.append((role, round(score * 100, 2)))

In [14]:
# üîù Top Suggested Careers
career_df = pd.DataFrame(suggestions, columns=["Career Role", "Match %"])
career_df = career_df.sort_values("Match %", ascending=False)
print("\nüîÆ Top Career Suggestions:\n")
print(career_df)


üîÆ Top Career Suggestions:

                 Career Role  Match %
1       Full Stack Developer    71.43
0  Machine Learning Engineer    28.57
2      AI Research Assistant    20.00


In [15]:
# üìâ Skill Gap Analysis
def skill_gap(role, user_skills):
    required = set(roles[role])
    missing = required - set(user_skills)
    return list(missing)

selected_role = career_df.iloc[0]["Career Role"]
gaps = skill_gap(selected_role, detected_skills)

print(f"\nüß© Skill Gaps for '{selected_role}':")
print(gaps)


üß© Skill Gaps for 'Full Stack Developer':
['HTML', 'CSS']
