In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

# Load the dataset
email_data = pd.read_csv('emails.csv')
email_data.head()

Unnamed: 0,Email No.,the,to,ect,and,for,of,a,you,hou,...,connevey,jay,valued,lay,infrastructure,military,allowing,ff,dry,Prediction
0,Email 1,0,0,1,0,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Email 2,8,13,24,6,6,2,102,1,27,...,0,0,0,0,0,0,0,1,0,0
2,Email 3,0,0,1,0,0,0,8,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Email 4,0,5,22,0,5,1,51,2,10,...,0,0,0,0,0,0,0,0,0,0
4,Email 5,7,6,17,1,5,2,57,0,9,...,0,0,0,0,0,0,0,1,0,0


In [7]:
# Prepare the features and target variable
X = email_data.drop(columns=['Email No.', 'Prediction']).values  # Convert to numpy array
y = email_data['Prediction'].values  # Target as numpy array

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize K-Nearest Neighbors and Support Vector Machine models
knn = KNeighborsClassifier(n_neighbors=5)
svm = SVC(kernel='linear', C=1)

In [13]:
# Train the KNN model and make predictions
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Train the SVM model and make predictions
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [16]:
# Calculate and print performance metrics for both models
metrics = {
    "KNN": {
        "confusion_matrix": confusion_matrix(y_test, y_pred_knn),
        "accuracy": accuracy_score(y_test, y_pred_knn),
        "precision": precision_score(y_test, y_pred_knn),
        "recall": recall_score(y_test, y_pred_knn)
    },
    "SVM": {
        "confusion_matrix": confusion_matrix(y_test, y_pred_svm),
        "accuracy": accuracy_score(y_test, y_pred_svm),
        "precision": precision_score(y_test, y_pred_svm),
        "recall": recall_score(y_test, y_pred_svm)
    }
}


In [19]:
# Display results
for model, scores in metrics.items():
    print(f"{model} Model:")
    print(f"Confusion Matrix:\n{scores['confusion_matrix']}")
    print(f"Accuracy: {scores['accuracy']:.2f}")
    print(f"Precision: {scores['precision']:.2f}")
    print(f"Recall: {scores['recall']:.2f}\n")

KNN Model:
Confusion Matrix:
[[645  94]
 [ 48 248]]
Accuracy: 0.86
Precision: 0.73
Recall: 0.84

SVM Model:
Confusion Matrix:
[[715  24]
 [ 18 278]]
Accuracy: 0.96
Precision: 0.92
Recall: 0.94

