# Used to detect the rejection emails from jobs applied to.
 - Useful when integrating with Gmail account to automate application responses.

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

df = pd.read_csv('/kaggle/input/application-rejection-emails/Rejection Data - Sheet1.csv')
df.tail()

In [None]:
import string
import matplotlib.pyplot as plt

# convert email text to lowercase
df.Email = df.Email.apply(lambda x: x.lower())
# remove punctuation
df.Email = df.Email.apply(lambda x: x.translate(str.maketrans('','', string.punctuation)))
# remove numbers
df.Email = df.Email.apply(lambda x: x.translate(str.maketrans('','','1234567890')))
# remove newline tags
df.Email = df.Email.apply(lambda x: x.translate(str.maketrans('','','\n')))


df.Status.value_counts().plot(kind='bar')
plt.show()

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

# pull data into vectors to create collection of text/tokens
vectorizer = CountVectorizer()
x = vectorizer.fit_transform(df.Email)

encoder = LabelEncoder()
y = encoder.fit_transform(df.Status)

# split into train and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Shape of sets
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

In [None]:
%%time
nb = MultinomialNB()
nb.fit(x_train, y_train)

In [None]:
def predict_reject(email):
    category_names = {'reject':'reject','not_reject':'not-reject'}
    cod = nb.predict(vectorizer.transform([email]))
    return category_names[encoder.inverse_transform(cod)[0]]

print(predict_reject('Unfortunately we will not be moving forward'))
print(predict_reject('I found some job listings you may be interested in'))
print(predict_reject('We were very fortunate to have a strong group of applicants to consider for this role and have recently filled this position. Unfortunately, because this role is no longer available, we will not be moving forward with your application.'))
#print(predict_reject(''))


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier

# Init the classfifier
clf = OneVsRestClassifier(LogisticRegression())

# Fit classifier to training data
clf.fit(x_train, y_train)

# Print accuracy
print(f'Accuracy: {clf.score(x_test, y_test)}')

In [None]:
x_test_clv_pred = clf.predict(x_test)
confusion_matrix(y_test, x_test_clv_pred)
print(classification_report(y_test, x_test_clv_pred, target_names=encoder.classes_))

 - Inspiration and code samples from https://towardsdatascience.com/i-built-a-reject-not-reject-email-classifier-for-my-job-applications-844a3b6cd67e