In [65]:
import pandas as pd
import tensorflow as tf
import pickle
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [67]:
df = pd.read_csv("spam.csv", encoding='latin-1')[['v1', 'v2']]
df.columns = ['label', 'message']


In [69]:
# Convert labels to binary (ham=0, spam=1)
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

texts = df['message'].astype(str).tolist()
labels = df['label'].values

In [71]:
# 2. Tokenize the text
max_words = 5000
max_len = 100

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len)

In [73]:
#3. Train-test split
x_train, x_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=42)


In [75]:
# 4. Build LSTM model
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=64, input_length=max_len))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])




In [77]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Compute class weights from y_train
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weight_dict = dict(enumerate(class_weights))

# Train model with class weights
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2, class_weight=class_weight_dict)


Epoch 1/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 90ms/step - accuracy: 0.8130 - loss: 0.5405 - val_accuracy: 0.9619 - val_loss: 0.1746
Epoch 2/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 75ms/step - accuracy: 0.9761 - loss: 0.1403 - val_accuracy: 0.9832 - val_loss: 0.0689
Epoch 3/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - accuracy: 0.9951 - loss: 0.0394 - val_accuracy: 0.9809 - val_loss: 0.0671
Epoch 4/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 93ms/step - accuracy: 0.9944 - loss: 0.0290 - val_accuracy: 0.9787 - val_loss: 0.0692
Epoch 5/5
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 116ms/step - accuracy: 0.9982 - loss: 0.0106 - val_accuracy: 0.9865 - val_loss: 0.0481


<keras.src.callbacks.history.History at 0x270d9e743b0>

In [79]:
# 6. Evaluate
loss, acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {acc:.2f}")


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.9821 - loss: 0.0514
Test Accuracy: 0.98


In [81]:
# 7. Predict spam/ham from new SMS
def encode_sms(text):
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=max_len)
    return pad

while True:
    sms = input("\nEnter SMS (or type exit to quit): ")
    if sms.lower() == 'exit':
        break
    pred = model.predict(encode_sms(sms))[0][0]
    print("Spam" if pred >= 0.4 else "Ham", f"({pred:.2f})")



Enter SMS (or type exit to quit):  Congratulations! You’ve won a $1000 gift card. Click here to claim now. URGENT: Your mobile number has won $500,000 in our draw. Reply WIN to claim. FREE entry into our weekly prize draw. Text WIN to 80088. Get cheap loans now with no credit check. Apply at www.easycash.com You’ve been selected for a special offer. Visit bit.ly/spamdeal Earn money from home in your spare time. Click here to start today! Exclusive offer just for you. Hurry before it expires! You have 1 unread voicemail. Click to listen: spamlink.com This is not a scam! You really won! Claim here: fakeoffer.net Lowest insurance rates guaranteed. Call now!


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 437ms/step
Spam (1.00)



Enter SMS (or type exit to quit):  exit


In [83]:
pickle_filename='spam_model.pkl'
with open(pickle_filename,'wb')as file:
     pickle.dump(model, file)

In [85]:
print("Model save to",pickle_filename)

Model save to spam_model.pkl


In [87]:
model.save("C:/Users/Dell/Download/spam_model.h5")




In [59]:
import os
print("Model saved at:", os.path.abspath("spam_model.h5"))


Model saved at: C:\Users\Dell\spam_model.h5


In [89]:
model.save("C:/Users/Dell/Downloads/spam_model.h5")  # corrected 'Download' to 'Downloads'
print("Model saved at: C:/Users/Dell/Downloads/spam_model.h5")




Model saved at: C:/Users/Dell/Downloads/spam_model.h5


In [93]:
import pickle

# Save tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)


In [105]:
import pickle

with open("C:/Users/Dell/Downloads/tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)


In [107]:
import os
print("Saving tokenizer to:", os.getcwd())


Saving tokenizer to: C:\Users\Dell


In [95]:
print("Model save to",pickle_filename)

Model save to spam_model.pkl
