In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load the dataset

df = pd.read_csv("emails.csv")

# Pre-process the dataset
# Assuming the dataset has 'text' and 'spam' columns
X = df['text']
y = df['spam']

# Convert text data to numerical data using TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Implement K-Nearest Neighbors
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

# Implement Support Vector Machine
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

# Evaluate the models
def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    return accuracy, precision, recall, f1

accuracy_knn, precision_knn, recall_knn, f1_knn = evaluate_model(y_test, y_pred_knn)
accuracy_svm, precision_svm, recall_svm, f1_svm = evaluate_model(y_test, y_pred_svm)

print("K-Nearest Neighbors Performance:")
print(f"Accuracy: {accuracy_knn}")
print(f"Precision: {precision_knn}")
print(f"Recall: {recall_knn}")
print(f"F1 Score: {f1_knn}")

print("\nSupport Vector Machine Performance:")
print(f"Accuracy: {accuracy_svm}")
print(f"Precision: {precision_svm}")
print(f"Recall: {recall_svm}")
print(f"F1 Score: {f1_svm}")

# Compare the models
models = ['K-Nearest Neighbors', 'Support Vector Machine']
accuracy_scores = [accuracy_knn, accuracy_svm]
precision_scores = [precision_knn, precision_svm]
recall_scores = [recall_knn, recall_svm]
f1_scores = [f1_knn, f1_svm]

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
sns.barplot(x=models, y=accuracy_scores)
plt.title('Accuracy Comparison')
plt.subplot(2, 2, 2)
sns.barplot(x=models, y=precision_scores)
plt.title('Precision Comparison')
plt.subplot(2, 2, 3)
sns.barplot(x=models, y=recall_scores)
plt.title('Recall Comparison')
plt.subplot(2, 2, 4)
sns.barplot(x=models, y=f1_scores)
plt.title('F1 Score Comparison')
plt.tight_layout()
plt.show()

AttributeError: 'int' object has no attribute 'lower'