
# Case Study: Naive Bayes Classifier (NBC) for SMS Spam Detection

This Jupyter notebook provides a step-by-step example of using the Naive Bayes Classifier for detecting spam messages in the `SMSSpamCollection` dataset.


In [2]:
import pandas as pd

# Load the dataset
file_path = 'SMSSpamCollection'
data = pd.read_csv(file_path, sep='\t', header=None, names=['Label', 'Message'])

# Display the first few rows of the dataset
data.head()


ModuleNotFoundError: No module named 'pandas'

In [None]:

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

# Convert labels to binary values
data['Label'] = data['Label'].map({'ham': 0, 'spam': 1})

# Split the data into features and target
X = data['Message']
y = data['Label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to numeric using CountVectorizer
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)


In [None]:

from sklearn.naive_bayes import MultinomialNB

# Initialize the model
nb_model = MultinomialNB()

# Train the model
nb_model.fit(X_train_vec, y_train)


In [None]:

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Make predictions
y_pred = nb_model.predict(X_test_vec)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Confusion Matrix:\n{conf_matrix}')
print(f'Classification Report:\n{class_report}')
