In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [3]:

# Load the dataset
#  Replace with the path to your file
emails_df = pd.read_csv('emails.csv')

In [4]:

# Separate features and labels
X = emails_df.drop(columns=['Email No.', 'Prediction'])  # Features
y = emails_df['Prediction']  # Labels

In [5]:

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:

# Scale features for better performance in KNN and SVM
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:

# Reduce dimensionality with PCA
pca = PCA(n_components=50)  # Reducing to 50 principal components for efficiency
X_train_reduced = pca.fit_transform(X_train_scaled)
X_test_reduced = pca.transform(X_test_scaled)


In [8]:

# Initialize classifiers
knn = KNeighborsClassifier(n_neighbors=5)
svm = SVC(kernel='linear')


In [9]:

# Train and evaluate KNN
knn.fit(X_train_reduced, y_train)
y_pred_knn = knn.predict(X_test_reduced)
knn_accuracy = accuracy_score(y_test, y_pred_knn)
knn_report = classification_report(y_test, y_pred_knn)


In [10]:

print("K-Nearest Neighbors (KNN) Performance:")
print("Accuracy:", knn_accuracy)
print("Classification Report:\n", knn_report)


K-Nearest Neighbors (KNN) Performance:
Accuracy: 0.9565217391304348
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.96      0.97       739
           1       0.90      0.96      0.93       296

    accuracy                           0.96      1035
   macro avg       0.94      0.96      0.95      1035
weighted avg       0.96      0.96      0.96      1035



In [11]:

# Train and evaluate SVM
svm.fit(X_train_reduced, y_train)
y_pred_svm = svm.predict(X_test_reduced)
svm_accuracy = accuracy_score(y_test, y_pred_svm)
svm_report = classification_report(y_test, y_pred_svm)


In [12]:

print("\nSupport Vector Machine (SVM) Performance:")
print("Accuracy:", svm_accuracy)
print("Classification Report:\n", svm_report)



Support Vector Machine (SVM) Performance:
Accuracy: 0.9603864734299516
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.97      0.97       739
           1       0.93      0.93      0.93       296

    accuracy                           0.96      1035
   macro avg       0.95      0.95      0.95      1035
weighted avg       0.96      0.96      0.96      1035

