In [5]:
#spacy
import spacy
from spacy.pipeline import EntityRuler
from spacy.lang.en import English
from spacy.tokens import Doc

#gensim
import gensim
from gensim import corpora

#Visualization
from spacy import displacy
# import pyLDAvis
# import pyLDAvis.gensim
from wordcloud import WordCloud
import plotly.express as px
import matplotlib.pyplot as plt

#Data loading/ Data manipulation
import pandas as pd
import numpy as np
import jsonlines

#nltk
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# nltk.download(['stopwords','wordnet'])

#warning
import warnings 
warnings.filterwarnings('ignore')

In [6]:
df = pd.read_csv("../Dataset/FilteredResumeDataSet.csv")
df = df.reindex(np.random.permutation(df.index))
data = df.copy().iloc[
    0:200,
]
data.head()

Unnamed: 0.1,Unnamed: 0,Category,Resume,cleaned_resume
68,824,DotNet Developer,TECHNICAL SKILLS â¢ Web Technologies: ASP .NE...,TECHNICAL SKILLS Web Technologies ASP NET HTML...
22,328,Java Developer,"Computer Skills: Languages And Script: JSP, Se...",Computer Skills Languages And Script JSP Servl...
53,710,Database,Technical Expertise Operating Systems Microsof...,Technical Expertise Operating Systems Microsof...
17,322,Java Developer,TECHNICAL STRENGTHS Computer Language Java/J2E...,TECHNICAL STRENGTHS Computer Language Java J2E...
69,825,DotNet Developer,Participated in intra college cricket competit...,Participated in intra college cricket competit...


In [7]:
# import spacy.cli
# spacy.cli.download("en_core_web_lg")
nlp = spacy.load("en_core_web_lg")
skill_pattern_path = "../Dataset/jz_skill_patterns.jsonl"

In [8]:
ruler = nlp.add_pipe("entity_ruler")
ruler.from_disk(skill_pattern_path)
nlp.pipe_names

['tok2vec',
 'tagger',
 'parser',
 'attribute_ruler',
 'lemmatizer',
 'ner',
 'entity_ruler']

In [9]:
def get_skills(text):
    doc = nlp(text)
    myset = []
    subset = []
    for ent in doc.ents:
        if ent.label_ == "SKILL":
            subset.append(ent.text)
    myset.append(subset)
    return subset


def unique_skills(x):
    return list(set(x))

In [10]:
clean = []
for i in range(data.shape[0]):
    review = re.sub(
        '(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?"',
        " ",
        data["cleaned_resume"].iloc[i],
    )
    review = review.lower()
    review = review.split()
    lm = WordNetLemmatizer()
    review = [
        lm.lemmatize(word)
        for word in review
        if not word in set(stopwords.words("english"))
    ]
    review = " ".join(review)
    clean.append(review)

In [11]:
data["Clean_Resume"] = clean
data["skills"] = data["Clean_Resume"].str.lower().apply(get_skills)
data["skills"] = data["skills"].apply(unique_skills)
data.head()

Unnamed: 0.1,Unnamed: 0,Category,Resume,cleaned_resume,Clean_Resume,skills
68,824,DotNet Developer,TECHNICAL SKILLS â¢ Web Technologies: ASP .NE...,TECHNICAL SKILLS Web Technologies ASP NET HTML...,technical skill web technology asp net html cs...,"[asp net, javascript, database, bootstrap, jqu..."
22,328,Java Developer,"Computer Skills: Languages And Script: JSP, Se...",Computer Skills Languages And Script JSP Servl...,computer skill language script jsp servlet htm...,"[ajax, operating system, java, apache tomcat, ..."
53,710,Database,Technical Expertise Operating Systems Microsof...,Technical Expertise Operating Systems Microsof...,technical expertise operating system microsoft...,"[operating system, oracle, web server, testing..."
17,322,Java Developer,TECHNICAL STRENGTHS Computer Language Java/J2E...,TECHNICAL STRENGTHS Computer Language Java J2E...,technical strength computer language java j2ee...,"[operating system, java, server, swift, oracle..."
69,825,DotNet Developer,Participated in intra college cricket competit...,Participated in intra college cricket competit...,participated intra college cricket competition...,"[asp net, javascript, server]"


In [12]:
fig = px.histogram(
    data, x="Category", title="Distribution of Jobs Categories"
).update_xaxes(categoryorder="total descending")
fig.show()

In [13]:
Job_cat = data["Category"].unique()
Job_cat = np.append(Job_cat, "ALL")

In [14]:
Job_Category = 'Data Science'
Total_skills = []
if Job_Category != "ALL":
    fltr = data[data["Category"] == Job_Category]["skills"]
    for x in fltr:
        for i in x:
            Total_skills.append(i)
else:
    fltr = data["skills"]
    for x in fltr:
        for i in x:
            Total_skills.append(i)

fig = px.histogram(
    x=Total_skills,
    labels={"x": "Skills"},
    title=f"{Job_Category} Distribution of Skills",
).update_xaxes(categoryorder="total descending")
fig.show()

In [15]:
sent = nlp(data["Clean_Resume"].iloc[0])
displacy.render(sent, style="ent", jupyter=True)

In [17]:
patterns = df.Category.unique()
for a in patterns:
    ruler.add_patterns([{"label": "Job-Category", "pattern": a}])

In [18]:
# options=[{"ents": "Job-Category", "colors": "#ff3232"},{"ents": "SKILL", "colors": "#56c426"}]
colors = {
    "Job-Category": "linear-gradient(90deg, #aa9cfc, #fc9ce7)",
    "SKILL": "linear-gradient(90deg, #9BE15D, #00E3AE)",
    "ORG": "#ffd966",
    "PERSON": "#e06666",
    "GPE": "#9fc5e8",
    "DATE": "#c27ba0",
    "ORDINAL": "#674ea7",
    "PRODUCT": "#f9cb9c",
}
options = {
    "ents": [
        "Job-Category",
        "SKILL",
        "ORG",
        "PERSON",
        "GPE",
        "DATE",
        "ORDINAL",
        "PRODUCT",
    ],
    "colors": colors,
}
sent = nlp(data["Resume"].iloc[5])
displacy.render(sent, style="ent", jupyter=True, options=options)

In [19]:
input_resume = '''Nilesh Dhondiba Virkar
Balkum pada no 3 , Thane (W)
9137858211
Nileshvirkar2000@gmail.com
https://www.linkedin.com/in/niles
h-virkar-37118b246
Education
A.P Shah Institute of Technology / Bachelor of Engineering 2019-2023
In Information Technology with 8.21 CGPA (Pursuing)
Government Polytechnic Thane(Diploma IT) 2016-2019
Diploma in Information Technology with an aggregate of 66.18%
Shivai vidya mandir (10th) 2015-2016
Graduated School with an aggregate of 70%
Skills
Programming Language Known: Python, C++, C, Java.
Framework: react.Js
Database: MySQL.
Language Known: Marathi, English, Hindi 
Projects
Chatbot for Healthcare using Machine Learning – Academic Project
A chatbot which replies according to the user's question by predicting the reply from pre-trained data. Machine Learning Code was done by using Python, Flask, Numpy, and NLT libraries with PyTorch.
Blood Bank Management System – Academic Project -
A website for blood donation and requesting blood, the front end was made using reactjs and the backend with Mysql as the database.
VIRTUAL INTERNSHIPS
APSIT SKILLS INTERNSHIP. 2020
AICTE Internships
Cyber security Foundation. 2021
Palo Alto Networks, Cyber security Academy
Cloud Architecting. 2022
AWS, AWS Academy
Certificates
C for everyone 2020
Coursera, Project Network
Database Management Essentials 2020
Coursera, University of Colorado
Python Data Structures 2020
Coursera, University of Michigan
Cyber security Foundation 2021
Palo Alto Networks, Cyber security Academy
Cloud Foundations 2021
AWS, AWS Academy
IoT Fundamentals: Connecting Things 2022
Cisco, Cisco Networking Academy
Cloud Architecting 2022
AWS, AWS Academy
Networking Essentials 2022
Cisco, Cisco Networking Academy
Extra-curricular
SAP Institute Name: Government Polytechnic Thane,
I have learnt basic introduction to SAP through this workshop
Inter-college sports and cultural committee coordinator
Inter-college sports committee team manager – IT Department
Inter-college cultural event security head
Interest
Web Development
Fitness freak
Kabbadi
'''
sent2 = nlp(input_resume)
displacy.render(sent2, style="ent", jupyter=True, options=options)

In [20]:
input_skills = '''Python, Java, C'''
req_skills = input_skills.lower().split(",")
resume_skills = unique_skills(get_skills(input_resume.lower()))
score = 0
for x in req_skills:
    if x in resume_skills:
        score += 1
req_skills_len = len(req_skills)
match = round(score / req_skills_len * 100, 1)

print(f"The current Resume is {match}% matched to your requirements")

The current Resume is 33.3% matched to your requirements


In [21]:
print(resume_skills)

['python', 'flask', 'data structures', 'machine learning', 'mysql', 'security', 'framework', 'programming language', 'database', 'engineering', 'numpy', 'front end', 'libraries']
