In [6]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [7]:
# Step 1: Load Dataset
# -----------------------------------------------------------
df = pd.read_csv(r"C:\Users\TEJASWINI CHANDARGI\ML\ML18dataset\email.csv")

# -----------------------------------------------------------
# Step 2: Separate Input (X) and Output (y)
# -----------------------------------------------------------
# Drop "Email No." column because it is not a feature
X = df.drop(['Email No.', 'Prediction'], axis=1)

# Label column
y = df['Prediction']

In [8]:
# Step 3: Train-Test Split
# -----------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [9]:
# Step 4: K-Nearest Neighbors
# -----------------------------------------------------------
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)

print("\n===== K-NEAREST NEIGHBORS RESULTS =====")
print("Accuracy:", accuracy_score(y_test, knn_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, knn_pred))
print("\nClassification Report:\n", classification_report(y_test, knn_pred))


===== K-NEAREST NEIGHBORS RESULTS =====
Accuracy: 0.8631090487238979
Confusion Matrix:
 [[795 118]
 [ 59 321]]

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.87      0.90       913
           1       0.73      0.84      0.78       380

    accuracy                           0.86      1293
   macro avg       0.83      0.86      0.84      1293
weighted avg       0.87      0.86      0.87      1293



In [10]:
# Step 5: Support Vector Machine (SVM)
# -----------------------------------------------------------
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
svm_pred = svm.predict(X_test)

print("\n===== SUPPORT VECTOR MACHINE RESULTS =====")
print("Accuracy:", accuracy_score(y_test, svm_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, svm_pred))
print("\nClassification Report:\n", classification_report(y_test, svm_pred))


===== SUPPORT VECTOR MACHINE RESULTS =====
Accuracy: 0.9559164733178654
Confusion Matrix:
 [[882  31]
 [ 26 354]]

Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97       913
           1       0.92      0.93      0.93       380

    accuracy                           0.96      1293
   macro avg       0.95      0.95      0.95      1293
weighted avg       0.96      0.96      0.96      1293



In [11]:
# Step 6: Performance Comparison
# -----------------------------------------------------------
print("\n================ FINAL COMPARISON ================")
print("KNN Accuracy:", accuracy_score(y_test, knn_pred))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))

if accuracy_score(y_test, svm_pred) > accuracy_score(y_test, knn_pred):
    print("\n✅ Result: **SVM performs better than KNN** for this dataset.")
else:
    print("\n✅ Result: **KNN performs better than SVM** for this dataset.")


KNN Accuracy: 0.8631090487238979
SVM Accuracy: 0.9559164733178654

✅ Result: **SVM performs better than KNN** for this dataset.
