In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
df = pd.read_csv(
    r"C:\Users\Priyanshu\Desktop\archive\archive\spam.csv",
    encoding="latin-1"
)

df = df[['v1', 'v2']]
df.columns = ['label', 'text']

print(df.head())
encoder = LabelEncoder()
df['label'] = encoder.fit_transform(df['label'])
vocab_size = 5000
max_len = 100

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(df['text'])

X = tokenizer.texts_to_sequences(df['text'])
X = pad_sequences(X, maxlen=max_len)

y = df['label']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_len),
    LSTM(128),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()
history = model.fit(
    X_train, y_train,
    epochs=5,
    batch_size=64,
    validation_split=0.2
)
loss, accuracy = model.evaluate(X_test, y_test)
print("Test Accuracy:", accuracy)
y_pred = (model.predict(X_test) > 0.5).astype(int)

print(classification_report(y_test, y_pred))
def predict_spam(email_text):
    seq = tokenizer.texts_to_sequences([email_text])
    padded = pad_sequences(seq, maxlen=max_len)
    prediction = model.predict(padded)[0][0]
    return "Spam" if prediction > 0.5 else "Not Spam"


print(predict_spam("Congratulations! You have won a free gift card"))
print(predict_spam("Meeting scheduled tomorrow at 10 AM"))


  label                                               text
0   ham  Go until jurong point, crazy.. Available only ...
1   ham                      Ok lar... Joking wif u oni...
2  spam  Free entry in 2 a wkly comp to win FA Cup fina...
3   ham  U dun say so early hor... U c already then say...
4   ham  Nah I don't think he goes to usf, he lives aro...




Epoch 1/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 106ms/step - accuracy: 0.8787 - loss: 0.3635 - val_accuracy: 0.9686 - val_loss: 0.0973
Epoch 2/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 108ms/step - accuracy: 0.9838 - loss: 0.0574 - val_accuracy: 0.9854 - val_loss: 0.0547
Epoch 3/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 104ms/step - accuracy: 0.9921 - loss: 0.0299 - val_accuracy: 0.9832 - val_loss: 0.0580
Epoch 4/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 111ms/step - accuracy: 0.9961 - loss: 0.0165 - val_accuracy: 0.9865 - val_loss: 0.0588
Epoch 5/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 103ms/step - accuracy: 0.9986 - loss: 0.0059 - val_accuracy: 0.9865 - val_loss: 0.0718
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.9918 - loss: 0.0448
Test Accuracy: 0.9883407950401306
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 