# KNN Using Numpy and Brute Force

In [1]:
import numpy as np
from collections import Counter

def euclidean_distance(x1,x2):
    return np.sqrt(np.sum((x1-x2)**2))
    
class KNN:

    def __init__(self, k=3):
        self.k = k

    def fit(self,X,y):
        self.X_train = X
        self.y_train = y

    def predict(self,X):
        predicted_labels = [self._predict(x) for x in X]
        return np.array(predicted_labels)

    def _predict(self,x):
        # Compute distances between x and all examples in the training set
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        
        # Sort by distance and return indices of the first k neighbors
        k_indices = np.argsort(distances)[: self.k]

        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.y_train[i] for i in k_indices]

        # majority value , most common class label
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
cmap = ListedColormap(['#FF0000','#00FF00','#0000FF'])

iris = datasets.load_iris()
X,y = iris.data,iris.target

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1234)

#print(X_train.shape)
#print(X_train[0])
#print(y_train.shape)
#print(y_train)
# it's 3 class problem

#plt.figure()
#plt.scatter(X[:,0],X[:,1], c=y, cmap=cmap,edgecolor='k',s=20)
#plt.show()

# with 3 nearest neighbor model
knn = KNN(k=3)
knn.fit(X_train,y_train)
predictions = knn.predict(X_test)

acc = np.sum(predictions == y_test) / len(y_test)
print(acc)

1.0


In [3]:
# with 5 nearest neighbor model
knn = KNN(k=5)
knn.fit(X_train,y_train)
predictions = knn.predict(X_test)

acc = np.sum(predictions == y_test) / len(y_test)
print(acc)

0.9666666666666667


In [4]:
# how to use counter
a = [1,1,1,2,2,2,2,3,4,5,6]
from collections import Counter
most_common = Counter(a).most_common(1)
print(most_common)
# 2's most time come as 4-times it give 2,4

[(2, 4)]
