In [1]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, confusion_matrix, accuracy_score
import numpy as np

# Load the dataset (replace with the path to your dataset if needed)
df = pd.read_csv('emails.csv')

# Split the dataset into features and labels
X = df.drop(columns=['Email No.', 'Prediction'])  # Features (drop non-relevant columns)
y = df['Prediction']  # Labels (0 - Not Spam, 1 - Spam)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the feature set for both KNN and SVM
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### K-NEAREST NEIGHBORS (KNN) ###

# Ask the user to input the value of k
k = int(input("Enter the value of k for K-Nearest Neighbors: "))

# Initialize KNN classifier with the user-defined k value
knn = KNeighborsClassifier(n_neighbors=k)

# Train the KNN model
knn.fit(X_train, y_train)

# Predict using KNN
y_pred_knn = knn.predict(X_test)

# Evaluate the KNN model
print("\nKNN Model Performance:")
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("KNN R²:", r2_score(y_test, y_pred_knn))
print("KNN MSE:", mean_squared_error(y_test, y_pred_knn))
print("KNN RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_knn)))
print("KNN MAE:", mean_absolute_error(y_test, y_pred_knn))
print("KNN Confusion Matrix:\n", confusion_matrix(y_test, y_pred_knn))


### SUPPORT VECTOR MACHINE (SVM) ###

# Initialize SVM classifier
svm = SVC(kernel='linear')

# Train the SVM model
svm.fit(X_train, y_train)

# Predict using SVM
y_pred_svm = svm.predict(X_test)

# Evaluate the SVM model
print("\nSVM Model Performance:")
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("SVM R²:", r2_score(y_test, y_pred_svm))
print("SVM MSE:", mean_squared_error(y_test, y_pred_svm))
print("SVM RMSE:", np.sqrt(mean_squared_error(y_test, y_pred_svm)))
print("SVM MAE:", mean_absolute_error(y_test, y_pred_svm))
print("SVM Confusion Matrix:\n", confusion_matrix(y_test, y_pred_svm))


Enter the value of k for K-Nearest Neighbors:  5



KNN Model Performance:
KNN Accuracy: 0.8253865979381443
KNN R²: 0.15735822973744573
KNN MSE: 0.17461340206185566
KNN RMSE: 0.41786768487387926
KNN MAE: 0.17461340206185566
KNN Confusion Matrix:
 [[846 251]
 [ 20 435]]

SVM Model Performance:
SVM Accuracy: 0.9400773195876289
SVM R²: 0.710827731976319
SVM MSE: 0.059922680412371136
SVM RMSE: 0.2447910954515526
SVM MAE: 0.059922680412371136
SVM Confusion Matrix:
 [[1043   54]
 [  39  416]]
