In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv('emails.csv')
df

In [None]:
df = df.drop('Email No.',axis=1)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC


In [None]:
X = df.iloc[:,:3000]
Y = df.iloc[:,3000]
print(Y)

In [None]:
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.2,random_state=21)

In [None]:
KNN_classifier = KNeighborsClassifier(n_neighbors=3)
KNN_classifier.fit(x_train,y_train)
knn_prediction = KNN_classifier.predict(x_test)

In [None]:
my_svc = SVC()
my_svc.fit(x_train,y_train)
svc_prediction = my_svc.predict(x_test)

In [None]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error,confusion_matrix,ConfusionMatrixDisplay

In [None]:
# R2 score
knn_r2 = r2_score(y_test,knn_prediction)
svc_r2 = r2_score(y_test,svc_prediction)
print("KNN R2: ",knn_r2)
print("SVM R2: ",svc_r2)

In [None]:
knn_MAE = mean_absolute_error(y_test,knn_prediction)
svc_MAE= mean_absolute_error(y_test,svc_prediction)
print("KNN MAE: ",knn_MAE)
print("SVM MAE: ",svc_MAE)

In [None]:
knn_MSE = mean_squared_error(y_test,knn_prediction)
svc_MSE = mean_squared_error(y_test,svc_prediction)
print("KNN MSE: ",knn_MSE)
print("SVM MSE: ",svc_MSE)
print("KNN RMSE: ",np.sqrt(knn_MSE))
print("SVM RMSE: ",np.sqrt(svc_MSE))

In [None]:
import matplotlib.pyplot as plt
knn_CM = confusion_matrix(y_test,knn_prediction)
svc_CM = confusion_matrix(y_test,svc_prediction)
knn_plot = ConfusionMatrixDisplay(knn_CM)
svc_plot = ConfusionMatrixDisplay(svc_CM)
knn_plot.plot()
svc_plot.plot()
plt.show()

In [None]:
from sklearn.metrics import accuracy_score
print("KNN accuracy: ",accuracy_score(y_test,knn_prediction))
print("SVM accuracy: ",accuracy_score(y_test,svc_prediction))

In [None]:
KNN_classifier_withk5 = KNeighborsClassifier(n_neighbors=5)
KNN_classifier_withk5.fit(x_train,y_train)
knn_prediction_withk5 = KNN_classifier_withk5.predict(x_test)

In [None]:
knn_k5_MSE = mean_squared_error(y_test,knn_prediction_withk5)
knn_k5_r2 = r2_score(y_test,knn_prediction_withk5)
knn_k5_MAE = mean_absolute_error(y_test,knn_prediction_withk5)

In [None]:


KNN_classifier_withkrootN = KNeighborsClassifier(n_neighbors=int(np.sqrt(X.shape[0])))
KNN_classifier_withkrootN.fit(x_train,y_train)
knn_prediction_withkrootN = KNN_classifier_withkrootN.predict(x_test)

In [None]:
knn_krootN_MSE = mean_squared_error(y_test,knn_prediction_withkrootN)
knn_krootN_r2 = r2_score(y_test,knn_prediction_withkrootN)
knn_krootN_MAE = mean_absolute_error(y_test,knn_prediction_withkrootN)

In [None]:
cols = ['K','RMSE','MSE','R2','MAE']
result_tabulation = pd.DataFrame(columns = cols)

In [None]:
full_metrics = pd.Series({'K': "3", 'RMSE' : np.sqrt(knn_MSE),'MSE' : knn_MSE, 'MAE' : knn_MAE, 'R-Squared' : knn_r2})
full_metricsk5 = pd.Series({'K': "5", 'RMSE' : np.sqrt(knn_k5_MSE),'MSE' : knn_k5_MSE, 'MAE' : knn_k5_MAE, 'R-Squared' : knn_k5_r2})
full_metricskrootN = pd.Series({'K': "sqrt(N)", 'RMSE' : np.sqrt(knn_krootN_MSE),'MSE' : knn_krootN_MSE, 'MAE' : knn_krootN_MAE, 'R-Squared' : knn_krootN_r2})

# append our result table using append()
# ignore_index=True: does not use the index labels
# python can only append a Series if ignore_index=True or if the Series has a name
result_tabulation = pd.concat([full_metrics,full_metricsk5,full_metricskrootN], ignore_index = False,axis=1)
# result_tabulation = result_tabulation.append(full_metricsk5, ignore_index = True)
# result_tabulation = result_tabulation.append(full_metricskrootN, ignore_index = True)

# print the result table
result_tabulation