In [None]:
# Spam Detection Model using Scikit-learn

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# -------------------------
# 1. Dataset
# -------------------------
data = {
    "message": [
        "Congratulations! You won a free lottery. Claim now!",
        "Hi John, are we meeting tomorrow?",
        "Exclusive deal just for you. Click to win prizes",
        "Don't forget to submit the assignment",
        "Win cash now!!! Limited offer",
        "Let's catch up later",
        "You have been selected for a cash reward",
        "Reminder: Your appointment is scheduled at 5 PM",
        "Claim your free vacation now",
        "Meeting postponed to next week",
        "Urgent: Your account has been compromised",
        "Lunch at 1 PM?",
        "Limited time offer! Buy now!",
        "Can you send me the notes?",
        "Earn money instantly by clicking this link",
        "Let's go for a walk today",
        "Your invoice is attached",
        "Free entry in a contest! Participate now",
        "Are you joining the call?",
        "Congratulations, you are our lucky winner"
    ],
    "label": [
        "spam", "ham", "spam", "ham", "spam", "ham",
        "spam", "ham", "spam", "ham", "spam", "ham",
        "spam", "ham", "spam", "ham", "ham", "spam",
        "ham", "spam"
    ]
}

df = pd.DataFrame(data)

X = df["message"]
y = df["label"]

# -------------------------
# 2. Text Vectorization
# -------------------------
vectorizer = CountVectorizer()
X_vec = vectorizer.fit_transform(X)

# -------------------------
# 3. Train-Test Split
# -------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_vec, y, test_size=0.3, random_state=42
)

# -------------------------
# 4. Train a Naive Bayes Classifier
# -------------------------
model = MultinomialNB()
model.fit(X_train, y_train)

print("Model trained successfully.\n")

# -------------------------
# 5. Evaluate Model
# -------------------------
pred_test = model.predict(X_test)
accuracy = accuracy_score(y_test, pred_test)
print(f"Model Accuracy: {accuracy:.2f}\n")

# -------------------------
# 6. Make Predictions
# -------------------------
test_messages = [
    "You have won a free gift",
    "Please call me when you reach",
    "Limited time cash offer for you",
    "Don't forget our meeting tomorrow"
]

test_vec = vectorizer.transform(test_messages)
predictions = model.predict(test_vec)

print("Predictions:")
for msg, pred in zip(test_messages, predictions):
    print(f"Message: {msg} --> Prediction: {pred}")


Model trained with expanded dataset.
Message: You have won a free gift --> Prediction: spam
Message: Please call me when you reach --> Prediction: ham
Message: Limited time cash offer for you --> Prediction: spam
Message: Don't forget our meeting tomorrow --> Prediction: ham
