In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


import tensorflow as tf

import tensorflow_hub as hub
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, roc_curve, roc_auc_score, precision_score, recall_score, f1_score

In [None]:
df = pd.read_csv("../input/spam-text-message-classification/SPAM text message 20170820 - Data.csv")

In [None]:
df.head()

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.Category.value_counts(normalize=True)*100

In [None]:
df['Category'] = df['Category'].apply(lambda x: 1 if x == 'spam' else 0)

In [None]:
df.head()

In [None]:
X, y = df['Message'].to_list(), np.array(df['Category'].to_list())

In [None]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" 
model = hub.load(module_url)

In [None]:
def embed(input):
    return model(input)

In [None]:
message_embeddings = embed(X).numpy()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(message_embeddings, y, test_size=0.25, random_state=42, shuffle=True)

In [None]:
def create_model():
    tf.keras.backend.clear_session()

    inp       = Input(shape=512)

    hidden1   = Dense(32, activation='relu')(inp)
    dropout1  = Dropout(.2)(hidden1)
    hidden2   = Dense(16, activation='relu')(dropout1)
    dropout2  = Dropout(.2)(hidden2)

    opt       = Dense(1, activation='sigmoid')(dropout2)

    cmodel    = Model(inputs=[inp], outputs=[opt])
    
    return cmodel

In [None]:
cmodel = create_model()
cmodel.summary()

In [None]:
lr_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
es = EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=5,
                              verbose=0, mode='auto')

cmodel.compile(loss='binary_crossentropy', 
               optimizer='adamax', 
               metrics=['accuracy'])

history = cmodel.fit(X_train, 
                     y_train, 
                     validation_data=(X_test, y_test), 
                     batch_size=32, 
                     epochs=10,
                     callbacks=[lr_reduction, es], 
                     shuffle=True)

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 12))
ax1.plot(history.history['loss'], color='b', label="Training loss")
ax1.plot(history.history['val_loss'], color='r', label="validation loss")
ax1.set_xticks(np.arange(1, 1, 1))
ax1.set_yticks(np.arange(0, 1, 0.1))

ax2.plot(history.history['accuracy'], color='b', label="Training accuracy")
ax2.plot(history.history['val_accuracy'], color='r',label="Validation accuracy")
ax2.set_xticks(np.arange(1, 10, 1))

legend = plt.legend(loc='best', shadow=True)
plt.tight_layout()
plt.show()

In [None]:
ypred = cmodel.predict(X_test)

In [None]:
auc = roc_auc_score(y_test, ypred[:,0])
print('AUC: %.3f' % auc)

fpr, tpr, thresholds = roc_curve(y_test, ypred[:,0])

In [None]:
threshold_dict = {}
for val in thresholds:
    pred=ypred[:,0].copy()

    pred[pred>=val]=1
    pred[pred<val]=0
    
    precision = precision_score(y_test, pred, average='micro')
    recall = recall_score(y_test, pred, average='micro')
    f1 = f1_score(y_test, pred, average='micro')
    threshold_dict[f1] = {'threshold': val, 'precision': precision, 'recall': recall}
    #print("Threshold: {:.2f}, Precision: {:.4f}, Recall: {:.4f}, F1-measure: {:.4f}".format(val, precision, recall, f1))

max_f1 = max(threshold_dict.keys())
final_dict = threshold_dict[max_f1]
print(final_dict)

In [None]:
val = final_dict['threshold']

pred=ypred[:,0].copy()
pred[pred>=val]=1
pred[pred<val]=0


print(classification_report(y_test, pred))