# Imported libraries

In [1]:
import spacy
import nlp
import PyPDF2
import textract
import matplotlib.pyplot as plt
import re
import docx
import nltk
from spacy import displacy
from textblob import TextBlob
from collections import Counter
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Classification of content

In [2]:
# Define the categories to classify the content
categories = ['Skills', 'Education', 'Experience', 'Certifications']

# Initialize a dictionary to store the classified content
classified_content = {category: [] for category in categories}

# Read the Resume file

In [3]:
file_name=input("Enter the name of the resume file : ")
file_extension=file_name.split(".")[-1]

Enter the name of the resume file : GOKUL_PRASANTH'S_RESUME gokul63808@gmail.com.pdf


# Read the resume content

In [4]:
file_name = input("Enter the name of the resume file: ")
file_extension = file_name.split(".")[-1]

if file_extension == "txt":
    with open(file_name, 'r') as f:
        resume_content = f.read()
elif file_extension == "docx":
    doc = docx.Document(file_name)
    resume_content = ""
    for para in doc.paragraphs:
        resume_content += para.text
elif file_extension == "pdf":
    pdf_file = open(file_name, 'rb')
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    resume_content = ""
    for page in range(len(pdf_reader.pages)):
        resume_content += pdf_reader.pages[page].extract_text()
else:
    print("Invalid file format. Only txt, docx and pdf files are supported.")
    exit()

Enter the name of the resume file: GOKUL_PRASANTH'S_RESUME gokul63808@gmail.com.pdf


# Model Building

In [5]:
# Load the spacy model
nlp = spacy.load('en_core_web_sm')

# Process the resume content using the spacy model
doc = nlp(resume_content)

# Iterate through the entities in the resume and classify them
for ent in doc.ents:
    if ent.label_ == 'Skills':
        classified_content['Skills'].append(ent.text)
    elif ent.label_ == 'Education':
        classified_content['Education'].append(ent.text)
    elif ent.label_ == 'Professional Experience':
        classified_content['Experience'].append(ent.text)
    elif ent.label_ == 'Certifications':
        classified_content['Certifications'].append(ent.text)

# Print the classified content
print("Classified Content:")
print(classified_content)


Classified Content:
{'Skills': [], 'Education': [], 'Experience': [], 'Certifications': []}


# Model Evaluation

In [6]:
# Get the top 10 most common words in the resume
stop_words = set(stopwords.words("english"))
words = word_tokenize(resume_content)
filtered_words = [word for word in words if word.lower() not in stop_words]
word_freq = Counter(filtered_words)
top_10_words = word_freq.most_common(20)

# Print the top 10 most common words and their alternatives
print("\nTop 20 Words and their Alternatives:")
for word, freq in top_10_words:
    print(f"{word} - {nlp(word).similarity(nlp(word))}")
blob=TextBlob(resume_content)
print(blob.sentiment.polarity)


Top 20 Words and their Alternatives:
, - 1.0
. - 1.0
• - 1.0
 - 1.0
- - 1.0
data - 1.0
IoT - 1.0
: - 1.0
Data - 1.0
( - 1.0
) - 1.0
Intern - 1.0
learned - 1.0
2022 - 1.0
Artificial - 1.0
Intelligence - 1.0
Remote - 1.0
Training - 1.0
computer - 1.0
science - 1.0
0.031105169340463455


# Sentiment Analysis

In [7]:
def sentiment_scores(resume):
 
    # Create a SentimentIntensityAnalyzer object.
    sid_obj = SentimentIntensityAnalyzer()
 
    # polarity_scores method of SentimentIntensityAnalyzer
    # object gives a sentiment dictionary.
    # which contains pos, neg, neu, and compound scores.
    sentiment_dict = sid_obj.polarity_scores(resume_content)
     
    print("Overall sentiment dictionary is : ", sentiment_dict)
    print("Resume was rated as ", round(sentiment_dict['neg']*100,2), "% Negative")
    print("Resume was rated as ", round(sentiment_dict['neu']*100,2), "% Neutral")
    print("Resume was rated as ", round(sentiment_dict['pos']*100,2), "% Positive")
 
    print("Resume Overall Rated As", end = " ")
 
    # decide sentiment as positive, negative and neutral
    if sentiment_dict['compound'] >= 0.05 :
        print("Positive")
 
    elif sentiment_dict['compound'] <= - 0.05 :
        print("Negative")
 
    else :
        print("Neutral")
sentiment_scores(resume_content)

Overall sentiment dictionary is :  {'neg': 0.011, 'neu': 0.873, 'pos': 0.115, 'compound': 0.9962}
Resume was rated as  1.1 % Negative
Resume was rated as  87.3 % Neutral
Resume was rated as  11.5 % Positive
Resume Overall Rated As Positive
