In [93]:
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
import pandas as pd
import numpy as np

In [71]:
data = pd.read_csv("base_crabs.csv")
X = np.array(data.drop("class",axis=1))
y = np.array(data["class"])
target_names = np.array(["B","O"])

In [72]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=0)

In [73]:
pca = PCA(n_components=2).fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

In [97]:
for n in range(1,10,2):
    clf = KNeighborsClassifier(n_neighbors=n)
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print("\nKNN classifier with %d neighbors" % (n))
    print("Cross-Validation (10-fold) score: %f" % (cross_val_score(clf, X_train, y_train, cv=10).mean()))
    print(classification_report(y_test,y_pred,target_names=target_names))
    print(confusion_matrix(y_test,y_pred, labels=range(2)))
    print("Accuracy score: %f" % (accuracy_score(y_test,y_pred)))
    print("ROC auc score: %f" % (roc_auc_score(y_test,y_pred)))


KNN classifier with 1 neighbors
Cross-Validation (10-fold) score: 0.907070
             precision    recall  f1-score   support

          B       0.94      1.00      0.97        29
          O       1.00      0.94      0.97        31

avg / total       0.97      0.97      0.97        60

[[29  0]
 [ 2 29]]
Accuracy score: 0.966667
ROC auc score: 0.967742

KNN classifier with 3 neighbors
Cross-Validation (10-fold) score: 0.908095
             precision    recall  f1-score   support

          B       0.93      0.97      0.95        29
          O       0.97      0.94      0.95        31

avg / total       0.95      0.95      0.95        60

[[28  1]
 [ 2 29]]
Accuracy score: 0.950000
ROC auc score: 0.950501

KNN classifier with 5 neighbors
Cross-Validation (10-fold) score: 0.850403
             precision    recall  f1-score   support

          B       0.90      0.97      0.93        29
          O       0.97      0.90      0.93        31

avg / total       0.94      0.93      0.93   

In [96]:
for n in range(1,10,2):
    clf = KNeighborsClassifier(n_neighbors=n)
    clf.fit(X_train_pca,y_train)
    y_pred = clf.predict(X_test_pca)
    print("\n(PCA-version) KNN classifier with %d neighbors" % (n))
    print("Cross-Validation (10-fold) score: %f" % (cross_val_score(clf, X_train_pca, y_train, cv=10).mean()))
    print(classification_report(y_test,y_pred,target_names=target_names))
    print(confusion_matrix(y_test,y_pred, labels=range(2)))
    print("Accuracy score: %f" % (accuracy_score(y_test,y_pred)))
    print("ROC auc score: %f" % (roc_auc_score(y_test,y_pred)))


(PCA-version) KNN classifier with 1 neighbors
Cross-Validation (10-fold) score: 0.742637
             precision    recall  f1-score   support

          B       0.79      0.90      0.84        29
          O       0.89      0.77      0.83        31

avg / total       0.84      0.83      0.83        60

[[26  3]
 [ 7 24]]
Accuracy score: 0.833333
ROC auc score: 0.835373

(PCA-version) KNN classifier with 3 neighbors
Cross-Validation (10-fold) score: 0.750330
             precision    recall  f1-score   support

          B       0.81      0.90      0.85        29
          O       0.89      0.81      0.85        31

avg / total       0.85      0.85      0.85        60

[[26  3]
 [ 6 25]]
Accuracy score: 0.850000
ROC auc score: 0.851502

(PCA-version) KNN classifier with 5 neighbors
Cross-Validation (10-fold) score: 0.771282
             precision    recall  f1-score   support

          B       0.86      0.86      0.86        29
          O       0.87      0.87      0.87        31

avg