In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [11]:
# Load the dataset
df = pd.read_csv('emails.csv')


In [12]:
# Drop 'Email No.'
df = df.drop(columns=['Email No.'])

In [13]:
# Separate features (X) and labels (y)
X = df.drop(columns=['Prediction'])  # Features are all word frequencies
y = df['Prediction']  # Target labels: 0 (non-spam) and 1 (spam)

In [14]:
# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# Train a Naive Bayes classifier
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)


In [16]:
# Predict the labels for the test set
y_pred = nb_classifier.predict(X_test)


In [17]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 95.46%


In [18]:
# Display the confusion matrix and classification report
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

class_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(class_report)


Confusion Matrix:
[[704  35]
 [ 12 284]]

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.95      0.97       739
           1       0.89      0.96      0.92       296

    accuracy                           0.95      1035
   macro avg       0.94      0.96      0.95      1035
weighted avg       0.96      0.95      0.96      1035



In [19]:
# Predicting a new sample - Use a row from the dataset for prediction
new_email = X_test.iloc[0].values.reshape(1, -1)  # Select the first test sample for prediction
new_pred = nb_classifier.predict(new_email)

print(f"\nPrediction for the selected email: {'Spam' if new_pred[0] == 1 else 'Non-spam'}")



Prediction for the selected email: Non-spam


