# ðŸ“˜ Spam Email Classification (Feature-Based Dataset)
Using Multinomial Naive Bayes in Scikit-learn

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.naive_bayes import MultinomialNB

## Load Dataset

In [None]:
df = pd.read_csv('emails.csv')
df.head()

## Data Preparation

In [None]:
df.drop(columns=['Email No.'], inplace=True)

X = df.drop(columns=['Prediction'])
y = df['Prediction']

print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

## Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Model Training

In [None]:
model = MultinomialNB()
model.fit(X_train, y_train)

## Predictions

In [None]:
y_pred = model.predict(X_test)

## Model Evaluation

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

## Confusion Matrix

In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

## Conclusion
This project uses a feature-based email dataset where word frequencies are already converted into numerical form. 
A Multinomial Naive Bayes classifier was applied directly without text vectorization. 
The model achieved good accuracy in detecting spam emails.
