### Libraries Importing

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
plt.rcParams['font.size']=20

In [3]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
stopwords=set(STOP_WORDS)
from spacy.lang.en import English
parser=English()
import string
punctuations=string.punctuation

In [4]:
with open(file="label_names.pkl",mode="rb") as file:
    label_names=pickle.load(file=file)

In [5]:
print(label_names)

{1: 'Data Science', 4: 'HR', 12: 'Web Designing', 7: 'Mechanical Engineer', 10: 'Sales', 6: 'Java Developer', 8: 'Operations Manager', 9: 'Python Developer', 2: 'DevOps Engineer', 5: 'Hadoop', 3: 'ETL Developer', 0: 'Blockchain', 11: 'Testing'}


### Model Loading

In [6]:
with open(file="model/RF_model.pkl",mode="rb") as file:
    model=pickle.load(file=file)

### Data Cleaning

In [7]:
nlp=spacy.load("en_core_web_sm")

In [8]:
def preprocess_text(docx):
    sentence=parser(docx)
    sentence=[word.lemma_.lower().strip() if word.lemma_ != "-PRON-" else word.lower_ for word in sentence]
    sentence=[word for word in sentence if word not in stopwords and word not in punctuations]
    sentence=[word for word in sentence if len(word)>3 and word.isalpha()]
    return sentence

In [9]:
with open(file="model/tokens.pkl",mode="rb") as file:
    tok=pickle.load(file=file)

## User Input

In [308]:
input_string=input()

collaborative communication marketing sets entrepreneur basically cohesive manage meet cold science business management


In [309]:
input_string

'collaborative communication marketing sets entrepreneur basically cohesive manage meet cold science business management'

In [310]:
cleaned_text_data=preprocess_text(nlp(input_string))

In [311]:
print(cleaned_text_data)

['collaborative', 'communication', 'marketing', 'entrepreneur', 'basically', 'cohesive', 'manage', 'meet', 'cold', 'science', 'business', 'management']


### Data Visualization

In [312]:
encd_text=tok.texts_to_sequences([cleaned_text_data])
print(encd_text)

[[501, 30, 67, 503, 174, 504, 10, 65, 177, 28, 11, 6]]


In [313]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
pad_text=pad_sequences(sequences=encd_text,maxlen=100,padding="post")
print(pad_text)

[[501  30  67 503 174 504  10  65 177  28  11   6   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0]]


### Model Prediction

In [314]:
prediction=model.predict(pad_text)

In [315]:
prediction

array([10])

In [316]:
label=label_names[prediction[0]]
print("Predicted Class is --------------> {}".format(label))

Predicted Class is --------------> Sales


In [163]:
test_data=pd.read_csv("input/ResumeData.csv")

In [51]:
test_data.head()

Unnamed: 0,Resume_ID,Category,Resume
0,1001,Data Science,Skills * Programming Languages: Python (pandas...
1,1002,Data Science,Education Details \r\nMay 2013 to May 2017 B.E...
2,1003,Data Science,"Areas of Interest Deep Learning, Control Syste..."
3,1004,Data Science,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...
4,1005,Data Science,"Education Details \r\n MCA YMCAUST, Faridab..."


In [52]:
need_data=test_data.loc[test_data["Category"]==label]

In [64]:
need_data.reset_index(drop=True,inplace=True)

In [65]:
need_data.shape[0]

44

### Cosine Similarity

In [66]:
from sklearn.feature_extraction.text import CountVectorizer

def cosine_similarity(x, y):
    
    # Ensure length of x and y are the same
    if len(x) != len(y) :
        return None
    
    # Compute the dot product between x and y
    dot_product = np.dot(x, y)
    
    # Compute the L2 norms (magnitudes) of x and y
    magnitude_x = np.sqrt(np.sum(x**2)) 
    magnitude_y = np.sqrt(np.sum(y**2))
    
    # Compute the cosine similarity
    cosine_similarity = dot_product / (magnitude_x * magnitude_y)
    
    return cosine_similarity

In [67]:
input_data=pd.DataFrame([input_string],columns=["input"])

In [68]:
input_data

Unnamed: 0,input
0,software bachelor management payroll settlemen...


In [69]:
input_data.shape

(1, 1)

In [72]:
s_values=[]
for i in range(need_data.shape[0]):
    corpus=[input_data["input"][0], need_data["Resume"][i]]
    X = CountVectorizer().fit_transform(corpus).toarray()
    sims = cosine_similarity(X[0], X[1])
    s_values.append(sims)

In [73]:
need_data["probability_score"]=s_values

In [74]:
sorted_data = need_data.sort_values(["probability_score"], ascending=False)

In [75]:
sorted_data=sorted_data.reset_index(drop=True).head(10)

In [76]:
ID_list=list(sorted_data["Resume_ID"])

In [77]:
print(ID_list)

[1064, 1042, 1075, 1053, 1059, 1070, 1081, 1048, 1084, 1051]


### Loading User Info Data

In [78]:
hist=pd.read_csv('input/info.csv')

In [79]:
hist.shape

(962, 10)

In [80]:
hist.head()

Unnamed: 0,Resume_ID,City,State,Country,ZipCode,DegreeType,GraduationDate,WorkHistoryCount,TotalYearsExperience,CurrentlyEmployed
0,1001,Arlington,TX,US,76013,High School,01-01-2006,3,6,Yes
1,1002,Des Moines,IA,US,50320,Associate's,01-02-2010,4,9,Yes
2,1003,Greenville,SC,US,29611,Bachelor's,01-05-2012,6,15,Yes
3,1004,Richboro,PA,US,18954,Bachelor's,01-01-1992,4,19,Yes
4,1005,Baltimore,MD,US,21239,Bachelor's,01-12-2012,4,9,No


### Extracting model predicted class records

In [81]:
final_result=[]
for i in range(len(ID_list)):
    id_=ID_list[i]
    for j in range(len(hist)):
        hist_id=hist["Resume_ID"][j]
        if id_ ==hist_id:
            result=hist.loc[hist["Resume_ID"]==hist_id]
            final_result.append(result)

In [82]:
fr=pd.concat(objs=final_result).reset_index(drop=True)

In [83]:
fr[["probability_score","Category"]]=sorted_data[["probability_score","Category"]]

In [84]:
final_result=fr.drop(labels=["GraduationDate","DegreeType","WorkHistoryCount"],axis=1)

In [85]:
final_result

Unnamed: 0,Resume_ID,City,State,Country,ZipCode,TotalYearsExperience,CurrentlyEmployed,probability_score,Category
0,1064,Derby,KS,US,67037,7,Yes,0.280957,HR
1,1042,Lafayette,LA,US,70503,5,No,0.280957,HR
2,1075,Fort Worth,TX,US,76108,7,Yes,0.280957,HR
3,1053,Augusta,GA,US,30901,0,No,0.280957,HR
4,1059,Ecorse,MI,US,48229,8,Yes,0.069474,HR
5,1070,Euless,TX,US,76039,7,Yes,0.069474,HR
6,1081,Saginaw,TX,US,76179,21,No,0.069474,HR
7,1048,Bonner Springs,KS,US,66012,6,Yes,0.069474,HR
8,1084,Lexington,KY,US,40517,5,No,0.04714,HR
9,1051,Washington,DC,US,20012,5,Yes,0.04714,HR


## Thank You