In [1]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score


data = {
    "Email": [
        "Congratulations! You have won a free iPhone", 
        "Your electricity bill is due tomorrow", 
        "Lowest price on laptops, buy today", 
        "Schedule for next week’s team meeting", 
        "Get rich quick with this one trick", 
        "Can we reschedule our call?", 
        "Urgent: Update your banking information now", 
        "Happy birthday! Let’s celebrate soon", 
        "Exclusive offer: 70% discount on shoes", 
        "Reminder: Doctor’s appointment at 5 PM"
    ],
    "Label": [
        "Spam", 
        "Not Spam", 
        "Spam", 
        "Not Spam", 
        "Spam", 
        "Not Spam", 
        "Spam", 
        "Not Spam", 
        "Spam", 
        "Not Spam"
    ]
}

# Create DataFrame
df = pd.DataFrame(data)
print("Dataset:\n", df)

# Feature extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["Email"])
y = df["Label"].map({"Not Spam": 0, "Spam": 1})

# Stratified Train-test split (keeps class balance in train & test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

# Classification Report (no warnings now)
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=0))

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

Dataset:
                                          Email     Label
0  Congratulations! You have won a free iPhone      Spam
1        Your electricity bill is due tomorrow  Not Spam
2           Lowest price on laptops, buy today      Spam
3        Schedule for next week’s team meeting  Not Spam
4           Get rich quick with this one trick      Spam
5                  Can we reschedule our call?  Not Spam
6  Urgent: Update your banking information now      Spam
7         Happy birthday! Let’s celebrate soon  Not Spam
8       Exclusive offer: 70% discount on shoes      Spam
9       Reminder: Doctor’s appointment at 5 PM  Not Spam

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.33      1.00      0.50         1

    accuracy                           0.33         3
   macro avg       0.17      0.50      0.25         3
weighted avg       0.11      0.33      0.17         3

Accuracy: 0