In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Sample dataset for spam detection
data = {
    'text': [
        'Free entry in 2 a weekly competition!',
        'Call now to claim your prize.',
        'Hey, are we meeting tomorrow?',
        'URGENT! You have won a lottery.',
        'Don’t forget to submit the report.',
        'Win a free ticket to Bahamas!'
    ],
    'label': ['spam', 'spam', 'ham', 'spam', 'ham', 'spam']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Encode the labels
df['label'] = df['label'].map({'ham': 0, 'spam': 1})

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.3, random_state=42)

# Convert text to feature vectors
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train the Naive Bayes Classifier
model = MultinomialNB()
model.fit(X_train_vec, y_train)

# Make predictions
y_pred = model.predict(X_test_vec)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Test with a custom message
test_message = ['Congratulations! You have won a free gift card.']
test_vec = vectorizer.transform(test_message)
print("Prediction for test message:", 'spam' if model.predict(test_vec)[0] else 'ham')


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Prediction for test message: spam
