In [47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')
import tensorflow_hub as hub
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [48]:
df=pd.read_csv(r'C:\Users\Pakistan\Downloads\spam.csv', encoding='latin-1')
df.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [49]:
df = df.drop(['Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], axis=1)
df = df.rename(columns={'v1': 'label', 'v2': 'Text'})
df['label_enc'] = df['label'].map({'ham': 0, 'spam': 1})
df.head()

Unnamed: 0,label,Text,label_enc
0,ham,"Go until jurong point, crazy.. Available only ...",0
1,ham,Ok lar... Joking wif u oni...,0
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,1
3,ham,U dun say so early hor... U c already then say...,0
4,ham,"Nah I don't think he goes to usf, he lives aro...",0


In [50]:
X_train, X_test, y_train, y_test = train_test_split(df['Text'], df['label_enc'], test_size=0.2, random_state=42)

X_train_np = X_train.to_numpy()
X_test_np = X_test.to_numpy()
y_train_np = y_train.to_numpy()
y_test_np = y_test.to_numpy()

In [51]:
avg_words_len = round(sum([len(i.split())
                      for i in df['Text']]) / len(df['Text']))
total_words_length = len(set("". join(df['Text']).split()))

print(f"Data Loaded. Training samples: {len(X_train_np)}")
print(f"Average words per message: {avg_words_len}")
print(f"Total words per message: {total_words_length}")

Data Loaded. Training samples: 4457
Average words per message: 16
Total words per message: 18392


In [52]:
def compile_and_fit(model, epochs=5):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'] )
    history = model.fit(X_train_np, y_train_np, epochs =epochs, validation_dat=(X_test_np, y_test_np)
    )
    return history
def get_metrics(model, X, y):
    y_pred =np.round(model.predict(X))
    return{
        'accuracy': accuracy_score(y, y_pred),
        'precision': precision_score(y, y_pred),
        'recall': recall_score(y, y_pred),
        'f1-score': f1_score(y, y_pred)
    }

In [56]:
from tensorflow.keras.layers import TextVectorization
text_vec = TextVectorization(
    max_tokens=total_words_length,
    standardize='lower_and_strip_punctuation',
    output_mode='int',
    output_sequence_length=avg_words_len
)
text_vec.adapt(X_train_np)

In [54]:
def compile_and_fit(model, epochs=5):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(
        X_train_np, y_train_np,
        epochs=epochs,
        validation_data=(X_test_np, y_test_np)
    )
    return history

In [57]:
input_layer1 = layers.Input(shape=(1,), dtype=tf.string)
X1 = text_vec(input_layer1)
X1 = layers.Embedding(input_dim=total_words_length, output_dim=128)(X1)
X1 = layers.GlobalAveragePooling1D()(X1)
X1 = layers.Dense(32, activation='relu')(X1)
output_layer1 = layers.Dense(1, activation='sigmoid')(X1)
model_1 = keras.Model(input_layer1, output_layer1, name="Dense_Model")
history_1 = compile_and_fit(model_1)

Epoch 1/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 23ms/step - accuracy: 0.9112 - loss: 0.2711 - val_accuracy: 0.9614 - val_loss: 0.1344
Epoch 2/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.9787 - loss: 0.0826 - val_accuracy: 0.9767 - val_loss: 0.0766
Epoch 3/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.9904 - loss: 0.0391 - val_accuracy: 0.9776 - val_loss: 0.0691
Epoch 4/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - accuracy: 0.9948 - loss: 0.0230 - val_accuracy: 0.9776 - val_loss: 0.0665
Epoch 5/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.9975 - loss: 0.0151 - val_accuracy: 0.9785 - val_loss: 0.0622


In [58]:
input_layer2 = layers.Input(shape=(1,), dtype=tf.string)
X2 = text_vec(input_layer2)
X2 = layers.Embedding(input_dim=total_words_length, output_dim=128)(X2)
X2 = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(X2)
X2 = layers.Bidirectional(layers.LSTM(64))(X2)
output_layer2 = layers.Dense(1, activation='sigmoid')(X2)
model_2 = keras.Model(input_layer2, output_layer2, name="BiLSTM_Model")
history_2 = compile_and_fit(model_2)

Epoch 1/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 54ms/step - accuracy: 0.9446 - loss: 0.1674 - val_accuracy: 0.9749 - val_loss: 0.0863
Epoch 2/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 54ms/step - accuracy: 0.9899 - loss: 0.0377 - val_accuracy: 0.9803 - val_loss: 0.0704
Epoch 3/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 49ms/step - accuracy: 0.9969 - loss: 0.0149 - val_accuracy: 0.9803 - val_loss: 0.0808
Epoch 4/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 67ms/step - accuracy: 0.9993 - loss: 0.0037 - val_accuracy: 0.9794 - val_loss: 0.0804
Epoch 5/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 83ms/step - accuracy: 1.0000 - loss: 7.3487e-04 - val_accuracy: 0.9794 - val_loss: 0.1076


In [61]:
use_layer = hub.KerasLayer(
    "https://tfhub.dev/google/universal-sentence-encoder/4",
    trainable=False,
    input_shape=[],
    dtype=tf.string,
    name='USE'
)
input_layer3 = layers.Input(shape=[], dtype=tf.string)
embedding = layers.Lambda(lambda x: use_layer(x), output_shape=(512,))(input_layer3)
x3 = layers.Dense(64, activation='relu')(embedding)
x3 = layers.Dropout(0.2)(x3)
output_layer3 = layers.Dense(1, activation='sigmoid')(x3)
model_3 = keras.Model(input_layer3, output_layer3, name="USE_Model")
history_3 = compile_and_fit(model_3)

Epoch 1/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step - accuracy: 0.9096 - loss: 0.3119 - val_accuracy: 0.9695 - val_loss: 0.1220
Epoch 2/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9778 - loss: 0.0855 - val_accuracy: 0.9776 - val_loss: 0.0724
Epoch 3/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9841 - loss: 0.0580 - val_accuracy: 0.9821 - val_loss: 0.0600
Epoch 4/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.9859 - loss: 0.0478 - val_accuracy: 0.9830 - val_loss: 0.0551
Epoch 5/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.9886 - loss: 0.0409 - val_accuracy: 0.9830 - val_loss: 0.0537


In [None]:
results = {
    'Dense Embedding': get_metrics(model_1, X_test_np, y_test_np),
    'Bi-LSTM': get_metrics(model_2, X_test_np, y_test_np),
    'Transfer Learning (USE)': get_metrics(model_3, X_test_np, y_test_np)
}

results_df = pd.DataFrame(results).transpose()
print("Performance Table:")
print(results_df)

In [None]:
results_df.plot(kind='bar', figsize=(10, 6))
plt.title("Model Performance Metrics (Bar Chart)")
plt.ylabel("Score")
plt.ylim(0.8, 1.0)
plt.xticks(rotation=0)
plt.legend(loc='lower right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))

for model_name in results_df.index:
    plt.plot(
        results_df.columns,
        results_df.loc[model_name],
        marker='o',
        label=model_name,
        linewidth=2
    )
plt.title("Model Performance Trends (Line Graph)")
plt.ylabel("Score")
plt.xlabel("Metric")
plt.ylim(0.8, 1.0)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.show()