Name: Nikhil Rajendra Dhumal

Roll No.: 21102A0009

Class: BE CMPN A

In [2]:
import fitz
import spacy
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
skills_list = [
    "JavaScript", "Solidity", "C++", "Python", "React", "Next.js", "Node.js", "Prisma",
    "Ganache", "Docker", "Kubernetes", "AWS", "Kafka", "Monorepos", "Ethereum",
    "PostgreSQL", "MongoDB", "SQL", "Git", "Cloud Computing", "DevOps",
    "Agile Methodology", "HTML", "CSS", "TypeScript", "GraphQL", "REST APIs",
    "Jenkins", "CircleCI", "Terraform", "Ansible", "Redis", "Prometheus",
    "Grafana", "WebSockets", "gRPC", "WebRTC", "Jest", "Mocha", "Truffle",
    "Hardhat", "Azure", "Google Cloud Platform (GCP)"
]

In [5]:
def extract_text_from_pdf(pdf_path):
    with fitz.open(pdf_path) as pdf_doc:
        text = ""
        for page_num in range(pdf_doc.page_count):
            page = pdf_doc.load_page(page_num)
            text += page.get_text()
    return text

In [6]:
def extract_name(doc):
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            return ent.text
    return "Name not found"

In [7]:
def extract_skills(text):
    skills_found = []
    text_lower = text.lower()
    for skill in skills_list:
        if skill.lower() in text_lower:
            skills_found.append(skill)
    return skills_found

In [8]:
def extract_experience(text):
    lines = text.split("\n")
    experience_titles = []
    in_section = False

    for i, line in enumerate(lines):
        line = line.strip()
        if any(term in line.lower() for term in ["projects", "courses", "work"]):
            in_section = True
            continue

        if in_section:
            if "|" in line:
                title = line.split("|")[0].strip()
                experience_titles.append(title)
            elif line.startswith("â€¢"):
                title = line[1:].strip()
                experience_titles.append(title)

    return experience_titles

In [9]:
def extract_resume_info(pdf_path):
    resume_text = extract_text_from_pdf(pdf_path)


    doc = nlp(resume_text)


    name = extract_name(doc)
    skills = extract_skills(resume_text)
    experience = extract_experience(resume_text)

    return {
        "Name": name,
        "Skills": skills,
        "Experience": experience,
        "Resume Text": resume_text
    }

In [10]:
job_descriptions = [
    {
        "title": "Software Developer",
        "description": "We are looking for a skilled software developer with expertise in JavaScript, React, and Node.js."
    },
    {
        "title": "Blockchain Engineer",
        "description": "Experience with Ethereum, Solidity, and smart contract development. Knowledge of decentralized applications and blockchain architecture is essential."
    },
    {
        "title": "DevOps Engineer",
        "description": "Proficient in Docker, Kubernetes, and cloud platforms like AWS or GCP. Experience with CI/CD pipelines is required."
    },
    {
        "title": "Full Stack Developer",
        "description": "Seeking a Full Stack Developer with expertise in JavaScript, React, Next.js, Node.js to design scalable web applications and microservices, ensuring high performance and data consistency."
    },
    {
        "title": "Data Scientist",
        "description": "Strong understanding of machine learning models, NLP, and big data analysis. Experience with Python and SQL is preferred."
    }
]


In [11]:
def vectorize_text(text, job_descriptions):
    vectorizer = TfidfVectorizer()
    job_texts = [job["description"] for job in job_descriptions]
    texts = [text] + job_texts
    tfidf_matrix = vectorizer.fit_transform(texts)
    return tfidf_matrix

In [12]:
def calculate_similarity(tfidf_matrix):
    resume_vector = tfidf_matrix[0]
    job_vectors = tfidf_matrix[1:]
    similarities = cosine_similarity(resume_vector, job_vectors).flatten()
    return similarities

In [13]:
def recommend_jobs(resume_text, job_descriptions):
    tfidf_matrix = vectorize_text(resume_text, job_descriptions)
    similarities = calculate_similarity(tfidf_matrix)
    ranked_jobs = sorted(
        zip(similarities, job_descriptions),
        key=lambda x: x[0],
        reverse=True
    )
    return ranked_jobs

In [14]:
def extract_resume_and_recommend_jobs(pdf_path, job_descriptions):

    resume_info = extract_resume_info(pdf_path)

    recommendations = recommend_jobs(resume_info["Resume Text"], job_descriptions)

    print("Name:", resume_info["Name"])
    print("\nSkills:", ", ".join(resume_info["Skills"]))
    print("\nExperience:\n", "\n".join(resume_info["Experience"]))


    print("\nRecommended Jobs :")
    for score, job in recommendations:
        print(f"{job['title']}: Similarity Score = {score:.2f}")

    return recommendations

In [16]:
pdf_path = "data/Nikhil_Dhumal_MERN_Web_Dev.pdf"
resume_info = extract_resume_and_recommend_jobs(pdf_path, job_descriptions)

Name: Nikhil Dhumal

Skills: JavaScript, Python, React, Node.js, MongoDB, Git

Experience:
 







GitHub: https://github.com/nikhil-dhumal/Movies-Website





Search Usernames: Easily find and add friends by their usernames.
GitHub: https://github.com/nikhil-dhumal/Chat-Harbour



B.E. Computer Engineering

XII (HSC)

X (SSC)

Recommended Jobs :
Full Stack Developer: Similarity Score = 0.25
Software Developer: Similarity Score = 0.18
Data Scientist: Similarity Score = 0.08
Blockchain Engineer: Similarity Score = 0.06
DevOps Engineer: Similarity Score = 0.05
