# KNN, SVM, ANNs
## Prepare the dataset

In [ ]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
df = pd.read_csv('data/emails.csv')
print(df.head().to_string())

# # Check data description
print(df.describe(include='all').to_string())

# # Drop the email number column
df.drop(['Email No.'], axis=1, inplace=True)

# # Splitting the dataset into features (input) and target (output, label)
X = df.loc[:, df.columns != 'Prediction']
y = df['Prediction']

# # Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## KNN

In [ ]:
# # KNN Classifier training
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# # Test prediction
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("KNN Accuracy: ", accuracy)

## SVM

In [None]:
# # SVM Classifier training
from sklearn.svm import SVC
svm = SVC(kernel='linear', C=1.0, random_state=0)
svm.fit(X_train, y_train)

# # Test prediction
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("SVM Accuracy: ", accuracy)

## ANNs

In [ ]:
# # ANNs Classifier
from sklearn.neural_network import MLPClassifier
ann = MLPClassifier(hidden_layer_sizes=(8, 8, 8), activation='logistic', solver='sgd',
                    learning_rate_init=0.001, batch_size=128, max_iter=500)
ann.fit(X_train, y_train)

# # Test prediction
y_pred = ann.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("ANNs Accuracy: ", accuracy)