In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix



In [12]:
# Load the dataset
# Replace 'spam.csv' with the path to the dataset file
data = pd.read_csv('email_data.csv', encoding='latin-1')

In [15]:
data['Category'] = data['Category'].map({'spam': 1, 'ham': 0})

In [17]:
vectorizer = TfidfVectorizer(max_features=3000)  # Limit features for performance
X = vectorizer.fit_transform(data['Message'])
y = data['Category']

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

In [20]:
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [21]:
def evaluate_model(y_test, y_pred, model_name):
    print(f"{model_name} Performance:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
    print("\n")

In [22]:
evaluate_model(y_test, y_pred_knn, "K-Nearest Neighbors")

K-Nearest Neighbors Performance:
Accuracy: 0.9246636771300448
Classification Report:
               precision    recall  f1-score   support

           0       0.92      1.00      0.96       966
           1       1.00      0.44      0.61       149

    accuracy                           0.92      1115
   macro avg       0.96      0.72      0.78      1115
weighted avg       0.93      0.92      0.91      1115

Confusion Matrix:
 [[966   0]
 [ 84  65]]




In [None]:
evaluate_model(y_test, y_pred_svm, "Support Vector Machine")

Support Vector Machine Performance:
Accuracy: 0.9910313901345291
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      0.99       966
           1       1.00      0.93      0.97       149

    accuracy                           0.99      1115
   macro avg       0.99      0.97      0.98      1115
weighted avg       0.99      0.99      0.99      1115

Confusion Matrix:
 [[966   0]
 [ 10 139]]


