In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline

In [12]:
data = pd.read_csv('spam.csv', encoding='ISO-8859-1')

In [13]:
print(data.columns)

Index(['v1', 'v2', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4'], dtype='object')


In [14]:
data = data.rename(columns={'v1': 'label'})

In [15]:
data = data.rename(columns={'v1': 'message'})

In [16]:
data['label'] = (data['label'] == 'spam').astype(int)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(data['v2'], data['label'], test_size=0.2, random_state=42)


In [19]:
text_clf = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', MultinomialNB()),
])


In [20]:
text_clf.fit(X_train, y_train)

Pipeline(steps=[('tfidf', TfidfVectorizer()), ('clf', MultinomialNB())])

In [21]:
y_pred = text_clf.predict(X_test)
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      1.00      0.98       965
           1       1.00      0.72      0.84       150

    accuracy                           0.96      1115
   macro avg       0.98      0.86      0.91      1115
weighted avg       0.96      0.96      0.96      1115


Confusion Matrix:
[[965   0]
 [ 42 108]]


In [22]:
new_messages = [
    "Congratulations! You've won a free iPhone. Click here to claim your prize!",
    "Hey, what time should we meet for dinner tonight?"
]
predictions = text_clf.predict(new_messages)
for message, prediction in zip(new_messages, predictions):
    print(f"Message: {message}")
    print(f"Prediction: {'Spam' if prediction == 1 else 'Not Spam'}\n")

Message: Congratulations! You've won a free iPhone. Click here to claim your prize!
Prediction: Spam

Message: Hey, what time should we meet for dinner tonight?
Prediction: Not Spam

