In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load dataset
url = "https://raw.githubusercontent.com/justmarkham/pycon-2016-tutorial/master/data/sms.tsv"
data = pd.read_csv(url, sep='\t', header=None, names=['label', 'message'])

# Encode labels (spam = 1, ham = 0)
data['label'] = data['label'].map({'spam': 1, 'ham': 0})

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    data['message'], data['label'], test_size=0.2, random_state=42)

# Convert text to numerical data using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train a Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# Predict on test data
y_pred = model.predict(X_test_tfidf)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


Accuracy: 0.979372197309417
Precision: 0.9921875
Recall: 0.8523489932885906
F1 Score: 0.9169675090252708


In [8]:
# Function to classify new messages
def classify_message(message, model, vectorizer):
    processed_message = vectorizer.transform([message])  # Preprocess the input message
    prediction = model.predict(processed_message)
    return "Spam" if prediction[0] == 1 else "Ham"

# Example usage
new_message = "Congratulations! You've won a $1,000 Walmart gift card. Call now!"
result = classify_message(new_message, model, vectorizer)
print(f"The message is: {result}")


The message is: Spam


In [9]:
# Function to classify new messages
def classify_message(message, model, vectorizer):
    processed_message = vectorizer.transform([message])  # Preprocess the input message
    prediction = model.predict(processed_message)
    return "Spam" if prediction[0] == 1 else "Ham"

# Example usage
new_message = "hi you can pass this exam"
result = classify_message(new_message, model, vectorizer)
print(f"The message is: {result}")


The message is: Ham


In [10]:
# Function to classify new messages
def classify_message(message, model, vectorizer):
    processed_message = vectorizer.transform([message])  # Preprocess the input message
    prediction = model.predict(processed_message)
    return "Spam" if prediction[0] == 1 else "Ham"

# Example usage
new_message = "Hey, are we still meeting for lunch tomorrow?"
result = classify_message(new_message, model, vectorizer)
print(f"The message is: {result}")


The message is: Ham
