In [1]:
# Import Libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


# Load Dataset
data = pd.read_csv("spam.csv", encoding="latin-1")

print("Dataset Loaded Successfully")
data.head()


# Clean Dataset (Remove Extra Columns)
data = data[['v1','v2']]
data.columns = ['label','message']

print("\nAfter Cleaning:")
data.head()


print("\nLabels Converted")
data['label'].value_counts()


#  Text to Numbers (Vectorization)
vectorizer = TfidfVectorizer(stop_words='english')

X = vectorizer.fit_transform(data['message'])
y = data['label']

print("\nText Converted into Numbers")


#  Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("\nData Split Done")


# Train Model (Naive Bayes)
model = MultinomialNB()

model.fit(X_train, y_train)

print("\nModel Trained Successfully")


#  Prediction
y_pred = model.predict(X_test)

print("\nPrediction Done")


#  Accuracy Check
print("\nModel Accuracy:")
print("Accuracy =", accuracy_score(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


# Test with New Email (Demo)
print("\n--- Check New Email ---")

email = ["Congratulations! You have won free prize money"]

email_vec = vectorizer.transform(email)

result = model.predict(email_vec)

if result[0] == 1:
    print("Result: Spam Email ❌")
else:
    print("Result: Not Spam ✅")

Dataset Loaded Successfully

After Cleaning:

Labels Converted

Text Converted into Numbers

Data Split Done

Model Trained Successfully

Prediction Done

Model Accuracy:
Accuracy = 0.968609865470852

Classification Report:
              precision    recall  f1-score   support

         ham       0.96      1.00      0.98       965
        spam       1.00      0.77      0.87       150

    accuracy                           0.97      1115
   macro avg       0.98      0.88      0.93      1115
weighted avg       0.97      0.97      0.97      1115


--- Check New Email ---
Result: Not Spam ✅
