From 27e18751c4382268e634237e8d78e38a124acfe7 Mon Sep 17 00:00:00 2001 From: matthieu Date: Thu, 8 Aug 2019 19:17:35 +0200 Subject: [PATCH] feat(classifiers): implement KNN algorithm --- README.md | 1 + alchina/classifiers/knn.py | 53 +++++++++++++++++++++++++++ tests/classifiers/test_knn.py | 67 +++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 alchina/classifiers/knn.py create mode 100644 tests/classifiers/test_knn.py diff --git a/README.md b/README.md index 39f95e9..98a1ad3 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ Alchina is a Machine Learning framework. - Linear classifier - Ridge classifier +- K-Nearest Neighbors **Clusters** diff --git a/alchina/classifiers/knn.py b/alchina/classifiers/knn.py new file mode 100644 index 0000000..40b5f99 --- /dev/null +++ b/alchina/classifiers/knn.py @@ -0,0 +1,53 @@ +"""K-Nearest Neighbors""" + +import numpy as np + +from collections import Counter + +from alchina.exceptions import NotFitted +from alchina.metrics import accuracy_score + + +class KNNClassifier(object): + """K-Nearest Neighbors algorithm""" + + def __init__(self, n_neighbors=3): + self.n_neighbors = n_neighbors + + self.X_fit = None + self.y_fit = None + + def euclidian(self, a, b): + """Compute the euclidian distance between two samples.""" + return np.linalg.norm(a - b) + + def fit(self, X, y): + """Train the model.""" + self.X_fit = X + self.y_fit = y + + def predict(self, X): + """Predict a target given features.""" + if self.X_fit is None or self.y_fit is None: + raise NotFitted("the model must be fitted before usage") + + labels = [] + for x in X: + distances_labels = [ + (self.euclidian(x, x_fit), y_fit) + for x_fit, y_fit in zip(self.X_fit, self.y_fit) + ] + neighbors = sorted(distances_labels, key=lambda d: d[0])[: self.n_neighbors] + neighbors_labels = [neighbor[1][0] for neighbor in neighbors] + labels.append( + sorted( + neighbors_labels, key=Counter(neighbors_labels).get, reverse=True + )[0] + ) + return np.array(labels).reshape(-1, 1) + + def score(self, X, y): + """Score of the model.""" + if self.X_fit is None or self.y_fit is None: + raise NotFitted("the model must be fitted before usage") + return accuracy_score(self.predict(X), y) diff --git a/tests/classifiers/test_knn.py b/tests/classifiers/test_knn.py new file mode 100644 index 0000000..45dffdf --- /dev/null +++ b/tests/classifiers/test_knn.py @@ -0,0 +1,67 @@ +"""K-Nearest Neighbors tests.""" + +import numpy as np +import pytest + +from alchina.classifiers import KNNClassifier +from alchina.exceptions import NotFitted + + +# --- Linear classifier --- + + +def test_knn_classifier(): + """Test of `KNNClassifier` class.""" + knn = KNNClassifier(1) + + X = np.array([[0], [1]]) + y = np.array([[0], [1]]) + + knn.fit(X, y) + + assert knn.score(X, y) == 1 + + +def test_knn_classifier_predict(): + """Test of `KNNClassifier` class with a prediction.""" + knn = KNNClassifier(1) + + X = np.array([[0], [1]]) + y = np.array([[0], [1]]) + + knn.fit(X, y) + + assert np.equal(knn.predict(np.array([0])), np.array([0])) + + +def test_knn_classifier_multiclass(): + """Test of `LinearClassifier` with multiclass.""" + knn = KNNClassifier(1) + + X = np.array([[0], [1], [2]]) + y = np.array([[0], [1], [2]]) + + knn.fit(X, y) + + assert knn.score(X, y) == 1 + + +def test_knn_classifier_predict_not_fitted(): + """Test of `KNNClassifier` class with prediction without fit.""" + knn = KNNClassifier(1) + + X = np.array([[0], [1]]) + + with pytest.raises(NotFitted): + knn.predict(X) + + +def test_knn_classifier_score_not_fitted(): + """Test of `KNNClassifier` class with score calculation without fit.""" + knn = KNNClassifier(1) + + X = np.array([[0], [1]]) + y = np.array([[0], [1]]) + + with pytest.raises(NotFitted): + knn.score(X, y) == 1