In [1]:
# Author: Roi Yehoshua <roiyeho@gmail.com>
# Date: Jaunary 2014
# License: MIT

In [2]:
import numpy as np

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils.multiclass import unique_labels
from sklearn.metrics import euclidean_distances

In [3]:
class NearestNeighborClassifier(BaseEstimator, ClassifierMixin):
    """A simple 1-nearest neighbor classifier."""
    def __init__(self):
        pass  # No hyperparameters
        
    def fit(self, X, y):
        """Fit the classifier to the training data."""
        X, y = check_X_y(X, y)
        self.classes_ = unique_labels(y) 
        
        self.X_ = X
        self.y_ = y                  
        return self
    
    def predict(self, X):
        """Perform classification on an array of test vectors X."""
        check_is_fitted(self, ['X_', 'y_'])
        X = check_array(X)     
        
        closest = np.argmin(euclidean_distances(X, self.X_), axis=1)
        return self.y_[closest]

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [5]:
clf = NearestNeighborClassifier()
clf.fit(X_train, y_train)

NearestNeighborClassifier()

In [6]:
train_accuracy = clf.score(X_train, y_train)
print(f'Train accuracy: {train_accuracy:.4f}')

test_accuracy = clf.score(X_test, y_test)
print(f'Test accuracy: {test_accuracy:.4f}')

Train accuracy: 1.0000
Test accuracy: 0.9474
