In [11]:
import sys
import numpy as np
sys.path.append("..")
from data_processing.train_test_split import train_test_split
from models import knn
import models.classification_tree as tree

In [12]:
# Load the dataset
banknote_data = np.genfromtxt('../datasets/data_banknote_authentication.txt', delimiter=',')

# Split into features and labels
X = banknote_data[:, :-1]  # All rows, all columns except the last
y = banknote_data[:, -1]   # All rows, only the last column


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X,y,seed=2108)
print(X_train.shape)
print(X_test.shape)

(1029, 4)
(343, 4)


In [14]:
knn_banknote = knn.KNearestNeighbours(k=10)
tree_banknote = tree.ClassificationTree()

In [15]:
knn_banknote.fit(X_train, y_train)
tree_banknote.fit(X_train, y_train)
tree_banknote.print_tree()

X[0] <= 0.3223
Left:
|   X[1] <= 5.9781
|   Left:
|   |   X[2] <= 6.2204
|   |   Left:
|   |   |   X[0] <= -0.36038
|   |   |   Left:
|   |   |   |   --> Class: 1.0
|   |   |   Right:
|   |   |   |   X[2] <= 3.1143
|   |   |   |   Left:
|   |   |   |   |   --> Class: 1.0
|   |   |   |   Right:
|   |   |   |   |   --> Class: 0.0
|   |   Right:
|   |   |   X[1] <= -4.6062
|   |   |   Left:
|   |   |   |   --> Class: 1.0
|   |   |   Right:
|   |   |   |   X[0] <= -1.7344
|   |   |   |   Left:
|   |   |   |   |   --> Class: 1.0
|   |   |   |   Right:
|   |   |   |   |   --> Class: 0.0
|   Right:
|   |   X[0] <= -2.7419
|   |   Left:
|   |   |   X[2] <= 3.1392
|   |   |   Left:
|   |   |   |   --> Class: 1.0
|   |   |   Right:
|   |   |   |   --> Class: 0.0
|   |   Right:
|   |   |   --> Class: 0.0
Right:
|   X[2] <= -4.3839
|   Left:
|   |   X[0] <= 4.2164
|   |   Left:
|   |   |   --> Class: 1.0
|   |   Right:
|   |   |   --> Class: 0.0
|   Right:
|   |   X[0] <= 1.594
|   |   Left:
|   |

In [16]:
knn_pred = knn_banknote.predict(X_test)
tree_pred = tree_banknote.predict(X_test)

knn_acc = np.mean(y_test == knn_pred)
tree_acc = np.mean(y_test == tree_pred)

print("KNN (k = 10) Accuracy:",knn_acc)
print("Tree Accuracy:",tree_acc)

KNN (k = 10) Accuracy: 1.0
Tree Accuracy: 0.9708454810495627


In [17]:
from data_processing import cross_validation as cv

knn_bn_kfolds = knn.KNearestNeighbours(10)
tree_bn_kfolds = tree.ClassificationTree()

print("5-Folds - KNN - Mean Accuracy:", cv.k_folds_accuracy_score(knn_bn_kfolds, X_train, y_train, k=5, seed=2108))
print("10-Folds - KNN - Mean Accuracy:", cv.k_folds_accuracy_score(knn_bn_kfolds, X_train, y_train, k=10, seed=2108))
print("Leave-One-Out - KNN:", cv.leave_one_out_score(knn_bn_kfolds, X_train, y_train, seed=2108))


print("5-Folds - Tree - Mean Accuracy:", cv.k_folds_accuracy_score(tree_bn_kfolds, X_train, y_train, k=5, seed=2108))
print("10-Folds - Tree - Mean Accuracy:", cv.k_folds_accuracy_score(tree_bn_kfolds, X_train, y_train, k=10, seed=2108))


5-Folds - KNN - Mean Accuracy: 0.9990243902439024
10-Folds - KNN - Mean Accuracy: 0.9990196078431373
Leave-One-Out - KNN: 0.9990281827016521
5-Folds - Tree - Mean Accuracy: 0.9766600536818766
10-Folds - Tree - Mean Accuracy: 0.9707472178060412


In [18]:
print("Leave-One-Out - KNN:", cv.leave_one_out_score(tree_bn_kfolds, X_train, y_train, seed=2108))

In [None]:
%%timeit -r 1 -n 1
X_train, X_test, y_train, y_test = train_test_split(X,y,seed=2200)

#Banknote authentication dataset - 1400 samples
print("5-Folds - KNN - Mean Accuracy:", cv.k_folds_accuracy_score(knn_bn_kfolds, X_train, y_train, k=5, seed=2108))
print("10-Folds - KNN - Mean Accuracy:", cv.k_folds_accuracy_score(knn_bn_kfolds, X_train, y_train, k=10, seed=2108))
print("5-Folds - Tree - Mean Accuracy:", cv.k_folds_accuracy_score(tree_bn_kfolds, X_train, y_train, k=5, seed=2108))
print("10-Folds - Tree - Mean Accuracy:", cv.k_folds_accuracy_score(tree_bn_kfolds, X_train, y_train, k=10, seed=2108))

5-Folds - KNN - Mean Accuracy: 0.9970731707317073
10-Folds - KNN - Mean Accuracy: 0.9970588235294118
5-Folds - Tree - Mean Accuracy: 0.9815567744194189
10-Folds - Tree - Mean Accuracy: 0.9767885532591416
15.4 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [None]:
print("10-Folds - KNN - All Accuracies:", cv.k_folds_accuracy_scores(knn_bn_kfolds, X_train, y_train, k=10, seed=2108))


10-Folds - KNN - All Accuracies: [1.0, 1.0, 1.0, 0.9901960784313726, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
