In [None]:
import pandas as pd
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [None]:
#Using google colab - importing resume.csv
from google.colab import files
uploaded = files.upload()

Saving resume.csv to resume.csv


In [None]:
#Loading first 200 rows to test models
df = pd.read_csv('resume.csv', nrows=200)
df.head()

Unnamed: 0,ID,Resume_str,Resume_html,Category
0,16852973,HR ADMINISTRATOR/MARKETING ASSOCIATE\...,"<div class=""fontsize fontface vmargins hmargin...",HR
1,22323967,"HR SPECIALIST, US HR OPERATIONS ...","<div class=""fontsize fontface vmargins hmargin...",HR
2,33176873,HR DIRECTOR Summary Over 2...,"<div class=""fontsize fontface vmargins hmargin...",HR
3,27018550,HR SPECIALIST Summary Dedica...,"<div class=""fontsize fontface vmargins hmargin...",HR
4,17812897,HR MANAGER Skill Highlights ...,"<div class=""fontsize fontface vmargins hmargin...",HR


In [None]:
df = df[['Resume_str', 'Category']].dropna()

In [None]:
#Cleaning HTML
def clean_HTML(text):
  return re.sub(r'<.*?>', '', str(text))

df['Resume_str'] = df['Resume_str'].apply(clean_HTML)

In [None]:
#Encoding
le = LabelEncoder()
df['Category'] = le.fit_transform(df['Category'])

In [None]:
#Train-test split
X_train, X_test, y_train, y_test = train_test_split(df['Resume_str'], df['Category'], test_size=0.2, random_state=42)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

#TF-IDF Vectorization
tfif = TfidfVectorizer(max_features=5000)
X_train_tfidf = tfif.fit_transform(X_train)
X_test_tfidf = tfif.transform(X_test)

#Training Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

#Predictions
y_pred = model.predict(X_test_tfidf)

#Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

Accuracy: 0.95

Classification Report:
               precision    recall  f1-score   support

    DESIGNER       1.00      0.89      0.94        18
          HR       0.92      1.00      0.96        22

    accuracy                           0.95        40
   macro avg       0.96      0.94      0.95        40
weighted avg       0.95      0.95      0.95        40



In [None]:
from sklearn.naive_bayes import MultinomialNB

#Naive Bayes Model
nb_model = MultinomialNB()
nb_model.fit(X_train_tfidf, y_train)

#Predictions
nb_pred = nb_model.predict(X_test_tfidf)

#Evaluation
print("Accuracy:", accuracy_score(y_test, nb_pred))
print("\nClassification Report:\n", classification_report(y_test, nb_pred, target_names=le.classes_))

Accuracy: 0.8

Classification Report:
               precision    recall  f1-score   support

    DESIGNER       1.00      0.56      0.71        18
          HR       0.73      1.00      0.85        22

    accuracy                           0.80        40
   macro avg       0.87      0.78      0.78        40
weighted avg       0.85      0.80      0.79        40



In [None]:
from sklearn.svm import LinearSVC

#SVM Model
svm_model = LinearSVC()
svm_model.fit(X_train_tfidf, y_train)

#Predictions
svm_pred = svm_model.predict(X_test_tfidf)

#Evaluation
print("Accuracy:", accuracy_score(y_test, svm_pred))
print("\nClassification Report:\n", classification_report(y_test, nb_pred, target_names=le.classes_))

Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

    DESIGNER       1.00      0.56      0.71        18
          HR       0.73      1.00      0.85        22

    accuracy                           0.80        40
   macro avg       0.87      0.78      0.78        40
weighted avg       0.85      0.80      0.79        40

