In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

# Load the dataset
data = pd.read_csv('https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/sms_spam.csv')

# Encode the labels: ham -> 0, spam -> 1
le = LabelEncoder()
data['type'] = le.fit_transform(data['type'])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data['text'], data['type'], test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad the sequences
max_len = 100
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

# Define the model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=128, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_pad, y_train, epochs=5, batch_size=64, validation_data=(X_test_pad, y_test))

# Evaluate the model
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f'Accuracy: {accuracy*100:.2f}%')

# Define the predict_message function
def predict_message(message):
    # Tokenize and pad the message
    message_seq = tokenizer.texts_to_sequences([message])
    message_pad = pad_sequences(message_seq, maxlen=max_len)

    # Predict using the model
    prediction = model.predict(message_pad)[0][0]

    # Determine the class
    label = "spam" if prediction > 0.5 else "ham"
    probability = float(prediction) if label == "spam" else 1 - float(prediction)

    return [probability, label]

# Example usage
print(predict_message("Congratulations! You've won a free ticket to the Bahamas. Call now!"))
print(predict_message("Hey, are we still meeting for coffee at 4pm?"))


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5