# Case Study - Email Spam Classification using Naive Bayes

In [1]:
# Import necessary libraries from sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Sample Dataset and Levels

In [2]:
# Sample dataset
emails = [
    "Congratulations, you won a prize! Claim your free gift now",
    "Your invoice for the purchase is attached",
    "Get rich quick with this limited offer",
    "Important update regarding your account"
]

labels = [1, 0, 1, 0]  # 1 = spam, 0 = not spam


In [18]:
# Define sample email dataset
emails = [
    "Congratulations, you won a prize! Claim your free gift now",
    "Your invoice for the purchase is attached",
    "Get rich quick with this limited offer",
    "Important update regarding your account",
    "You have been selected for an exclusive lottery prize!",
    "Your subscription has been confirmed successfully",
    "Win a brand new car! Click the link to claim now",
    "Your PayPal account has been compromised, take immediate action"
]

labels = [1, 0, 1, 0, 1, 0, 1, 0]  # 1 = Spam, 0 = Not Spam

# A sample dataset might look like this:

| **Email ID** | **offer** | **winner** | **free** | **money** | **spam** |
|--------------|-----------|------------|----------|-----------|----------|
| 1            | 1         | 0          | 1        | 0         | 1        |
| 2            | 0         | 0          | 0        | 1         | 0        |
| 3            | 1         | 1          | 1        | 0         | 1        |
| 4            | 0         | 0          | 0        | 0         | 0        |



In [19]:
# Convert emails into feature vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

In [20]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

In [21]:
# Train Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

In [22]:
# Make Predictions
y_pred = model.predict(X_test)

In [23]:
# 6. Evaluate the model's performance using accuracy, confusion matrix, and classification report
accuracy = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)



In [24]:
# 7. Display the results
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("\nConfusion Matrix:")
print(cm)
print("\nClassification Report:")
print(class_report)

Model Accuracy: 100.00%

Confusion Matrix:
[[2]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [25]:
# Function to predict a new email
def predict_email(email_text):
    email_vectorized = vectorizer.transform([email_text])  # Transform input text
    prediction = model.predict(email_vectorized)[0]  # Predict (0 = Not Spam, 1 = Spam)
    return "Spam" if prediction == 1 else "Not Spam"

In [26]:
# Example: Predicting a new email
new_email = "Claim your free lottery prize now!"
print(f"\nNew Email: '{new_email}' is classified as: {predict_email(new_email)}")


New Email: 'Claim your free lottery prize now!' is classified as: Spam


In [27]:
# Function to classify multiple emails
def predict_multiple_emails(email_list):
    email_vectorized = vectorizer.transform(email_list)  # Transform input texts
    predictions = model.predict(email_vectorized)
    return ["Spam" if p == 1 else "Not Spam" for p in predictions]

In [28]:
# Multiple email predictions
new_emails = [
    "Congratulations, you have won a free iPhone!",
    "Your bank account has been compromised, reset your password now",
    "Meeting is scheduled for tomorrow at 10 AM",
    "Get 50% off on your next purchase, limited time offer!"
]
predictions = predict_multiple_emails(new_emails)

print("\n🔍 Batch Email Predictions:")
for email, pred in zip(new_emails, predictions):
    print(f"📩 '{email}' ➡ {pred}")


🔍 Batch Email Predictions:
📩 'Congratulations, you have won a free iPhone!' ➡ Spam
📩 'Your bank account has been compromised, reset your password now' ➡ Not Spam
📩 'Meeting is scheduled for tomorrow at 10 AM' ➡ Spam
📩 'Get 50% off on your next purchase, limited time offer!' ➡ Spam
