# Setup

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

import numpy as np
import sys

sys.path.append('../')

In [2]:
from splearn.neighbors import SimpleKNeighborsClassifier

# Dataset

In [3]:
iris_dataset = datasets.load_iris()
X = iris_dataset['data']
y = iris_dataset['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# scikit-learn implementation

In [4]:
sklearn_clf = KNeighborsClassifier(n_neighbors=5)
sklearn_clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [5]:
for i in range(30):
    predicted_y = sklearn_clf.predict(X_test[i].reshape(1, -1))
    print("--Examining row %i of test dataset... %s" % (i, y_test[i] == predicted_y))
    print("Data:")
    print(X_test[i])
    print("True label:")
    print(y_test[i])
    print("Predicted label:")
    print(predicted_y)

--Examining row 0 of test dataset... [ True]
Data:
[5.8 2.8 5.1 2.4]
True label:
2
Predicted label:
[2]
--Examining row 1 of test dataset... [ True]
Data:
[6.  2.2 4.  1. ]
True label:
1
Predicted label:
[1]
--Examining row 2 of test dataset... [ True]
Data:
[5.5 4.2 1.4 0.2]
True label:
0
Predicted label:
[0]
--Examining row 3 of test dataset... [ True]
Data:
[7.3 2.9 6.3 1.8]
True label:
2
Predicted label:
[2]
--Examining row 4 of test dataset... [ True]
Data:
[5.  3.4 1.5 0.2]
True label:
0
Predicted label:
[0]
--Examining row 5 of test dataset... [ True]
Data:
[6.3 3.3 6.  2.5]
True label:
2
Predicted label:
[2]
--Examining row 6 of test dataset... [ True]
Data:
[5.  3.5 1.3 0.3]
True label:
0
Predicted label:
[0]
--Examining row 7 of test dataset... [ True]
Data:
[6.7 3.1 4.7 1.5]
True label:
1
Predicted label:
[1]
--Examining row 8 of test dataset... [ True]
Data:
[6.8 2.8 4.8 1.4]
True label:
1
Predicted label:
[1]
--Examining row 9 of test dataset... [ True]
Data:
[6.1 2.8 4.  

# Simple Python implementation

In [6]:
my_clf = SimpleKNeighborsClassifier(n_neighbors=5)
my_clf.fit(X_train, y_train)

In [7]:
for i in range(30):
    predicted_y = my_clf.predict(X_test[i].reshape(1, -1))
    print("--Examining row %i of test dataset... %s" % (i, y_test[i] == predicted_y))
    print("Data:")
    print(X_test[i])
    print("True label:")
    print(y_test[i])
    print("Predicted label:")
    print(predicted_y)

--Examining row 0 of test dataset... [ True]
Data:
[5.8 2.8 5.1 2.4]
True label:
2
Predicted label:
[2]
--Examining row 1 of test dataset... [ True]
Data:
[6.  2.2 4.  1. ]
True label:
1
Predicted label:
[1]
--Examining row 2 of test dataset... [ True]
Data:
[5.5 4.2 1.4 0.2]
True label:
0
Predicted label:
[0]
--Examining row 3 of test dataset... [ True]
Data:
[7.3 2.9 6.3 1.8]
True label:
2
Predicted label:
[2]
--Examining row 4 of test dataset... [ True]
Data:
[5.  3.4 1.5 0.2]
True label:
0
Predicted label:
[0]
--Examining row 5 of test dataset... [ True]
Data:
[6.3 3.3 6.  2.5]
True label:
2
Predicted label:
[2]
--Examining row 6 of test dataset... [ True]
Data:
[5.  3.5 1.3 0.3]
True label:
0
Predicted label:
[0]
--Examining row 7 of test dataset... [ True]
Data:
[6.7 3.1 4.7 1.5]
True label:
1
Predicted label:
[1]
--Examining row 8 of test dataset... [ True]
Data:
[6.8 2.8 4.8 1.4]
True label:
1
Predicted label:
[1]
--Examining row 9 of test dataset... [ True]
Data:
[6.1 2.8 4.  

# Do the results agree?

In [8]:
sklearn_clf = KNeighborsClassifier(n_neighbors=5)
sklearn_clf.fit(X_train, y_train)

my_clf = SimpleKNeighborsClassifier(n_neighbors=5)
my_clf.fit(X_train, y_train)

In [9]:
for i in range(30):
    sklearn_predicted_y = sklearn_clf.predict(X_test[i].reshape(1, -1))
    my_predicted_y = my_clf.predict(X_test[i].reshape(1, -1))
    print("--Examining row %i of test dataset... %s" % (i, sklearn_predicted_y == my_predicted_y))

--Examining row 0 of test dataset... [ True]
--Examining row 1 of test dataset... [ True]
--Examining row 2 of test dataset... [ True]
--Examining row 3 of test dataset... [ True]
--Examining row 4 of test dataset... [ True]
--Examining row 5 of test dataset... [ True]
--Examining row 6 of test dataset... [ True]
--Examining row 7 of test dataset... [ True]
--Examining row 8 of test dataset... [ True]
--Examining row 9 of test dataset... [ True]
--Examining row 10 of test dataset... [ True]
--Examining row 11 of test dataset... [ True]
--Examining row 12 of test dataset... [ True]
--Examining row 13 of test dataset... [ True]
--Examining row 14 of test dataset... [ True]
--Examining row 15 of test dataset... [ True]
--Examining row 16 of test dataset... [ True]
--Examining row 17 of test dataset... [ True]
--Examining row 18 of test dataset... [ True]
--Examining row 19 of test dataset... [ True]
--Examining row 20 of test dataset... [ True]
--Examining row 21 of test dataset... [ True