In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
dataset = pd.read_csv("emails.csv")
print("Dataset loaded successfully!")
print(dataset.head())

Dataset loaded successfully!
  Email No.  the  to  ect  and  for  of    a  you  hou  ...  connevey  jay  \
0   Email 1    0   0    1    0    0   0    2    0    0  ...         0    0   
1   Email 2    8  13   24    6    6   2  102    1   27  ...         0    0   
2   Email 3    0   0    1    0    0   0    8    0    0  ...         0    0   
3   Email 4    0   5   22    0    5   1   51    2   10  ...         0    0   
4   Email 5    7   6   17    1    5   2   57    0    9  ...         0    0   

   valued  lay  infrastructure  military  allowing  ff  dry  Prediction  
0       0    0               0         0         0   0    0           0  
1       0    0               0         0         0   1    0           0  
2       0    0               0         0         0   0    0           0  
3       0    0               0         0         0   0    0           0  
4       0    0               0         0         0   1    0           0  

[5 rows x 3002 columns]


In [4]:
dataset = dataset.drop(columns=['Email No.'])

In [5]:
X = dataset.drop('spam', axis=1)   # Features
y = dataset['spam']                # Target

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

In [9]:
svm = SVC(kernel='linear')
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)

In [10]:
print("\n---- KNN Model Evaluation ----")
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))

print("\n---- SVM Model Evaluation ----")
print("Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))


---- KNN Model Evaluation ----
Accuracy: 0.9945862335653519
Confusion Matrix:
 [[1275    0    0]
 [   6   11    0]
 [   1    0    0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      1.00      1.00      1275
           1       1.00      0.65      0.79        17
           3       0.00      0.00      0.00         1

    accuracy                           0.99      1293
   macro avg       0.66      0.55      0.59      1293
weighted avg       0.99      0.99      0.99      1293


---- SVM Model Evaluation ----
Accuracy: 0.9876256767208044
Confusion Matrix:
 [[1266    5    3    0    1]
 [   6   11    0    0    0]
 [   0    0    0    0    0]
 [   0    0    1    0    0]
 [   0    0    0    0    0]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99      1275
           1       0.69      0.65      0.67        17
           2       0.00      0.00      0.00         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
acc_knn = accuracy_score(y_test, y_pred_knn)
acc_svm = accuracy_score(y_test, y_pred_svm)

print("\n---- Final Comparison ----")
print(f"KNN Accuracy: {acc_knn:.4f}")
print(f"SVM Accuracy: {acc_svm:.4f}")

if acc_knn > acc_svm:
    print("KNN performed better on this dataset.")
else:
    print("SVM performed better on this dataset.")


---- Final Comparison ----
KNN Accuracy: 0.9946
SVM Accuracy: 0.9876
KNN performed better on this dataset.
