In [1]:
# Step 1: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
import pickle

# Step 2: Load a small dataset
data = {
    "text": [
        "Free prize claim now",
        "Hi how are you",
        "Win cash now",
        "Call me tomorrow",
        "Limited offer free coupon",
        "Let us meet today"
    ],
    "label": ["spam", "ham", "spam", "ham", "spam", "ham"]
}

df = pd.DataFrame(data)

# Step 3: Convert text to lowercase
df["text"] = df["text"].str.lower()

# Step 4: Convert text to numerical features (Bag of Words)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df["text"])
y = df["label"]

# Step 5: Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Step 6: Train Multinomial Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Step 7: Predict class labels
y_pred = model.predict(X_test)

# Step 8: Evaluate model accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Step 9: Display actual and predicted labels
print("Actual Labels:", list(y_test))
print("Predicted Labels:", list(y_pred))

# Step 10: Save the trained model
with open("spam_classifier_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("Model saved successfully")


Accuracy: 1.0
Actual Labels: ['spam', 'ham']
Predicted Labels: [np.str_('spam'), np.str_('ham')]
Model saved successfully
