In [1]:
#Importing needed libraries
import numpy as np
from sklearn.datasets import load_wine
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score

In [2]:
#Getting the feature matrix and target vector from wine data set.
X = load_wine()['data']
y = load_wine()['target']

In [3]:
#Splitting the data in train and test with train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

## KNN implimentation from scratch

In [4]:
class KNN():
    #Define a KNN class to store our relevant functions in
    def __init__(self, k):
        '''
        The constructor of the KNN model.
            :param k: int 
                The number of nearest neighbors to find for each sample. 
        '''
        #Setting up the hyperparameters.
        self.k = k
        
    def fit( self, X, y):
        '''
        The fit function of the model.
        : param X : 2-D numpy.ndarray
            The X matrix with the features.
        : param y : 1-D numpy.ndarray
            The target vector.
        '''
        self.X = X
        self.y = y
        return self
    
    def predict(self, X):
        '''
        The predict function of the model.
        : param X : 2-D numpy.ndarray
            The X_test matrix with the features which we want to predict.
        '''
        #A list in witch we will store our predictions in.
        predictions = []
        
        #We will iterate through each point from our dataset
        for p in X:
            #A list that will contain the index and distance
            neighbours = []
            #This list will contain the labels of first k-neighbours
            labels = []
            
            for i in range(len(self.X)):
                #Getting the euclidian distances for our point and storing it in neighbours list
                dist = np.sqrt(sum(np.square(self.X[i] - p)))
                neighbours.append([i, dist])
            #Sorting our list to find the minimum distances
            neighbours = sorted(neighbours, key = lambda x : x[1])
            
            #Getting the first k minimum distances and storing into neighbours list
            neighbours = neighbours[ : self.k]
            
            #Iterating through neighbours list and adding their corresponding label
            for j in neighbours:
                labels.append(self.y[j[0]])
            #Using mean and then round function to get the most common label in labels list and adding 
            #them to predictions list
            predictions.append(int(np.array(labels).mean().round()))
        #Returning final predictions as an np.array
        return np.asarray(predictions)

In [5]:
#Instantiating our model
model = KNN(3)

In [6]:
#Using fit function on X_train features matrix and y_train target vector
model.fit(X_train, y_train)

<__main__.KNN at 0x16c745fca48>

In [7]:
#Using predict function on X_test feature matrix
y_pred = model.predict(X_test)

In [8]:
#Visualizing accuracy score
accuracy_score(y_test, y_pred)

0.7592592592592593

In [9]:
#Visualizing the confusion matrix
confusion_matrix(y_test, y_pred)

array([[17,  0,  2],
       [ 0, 19,  2],
       [ 0,  9,  5]], dtype=int64)

## KNN implimentation from Sklearn library

In [10]:
#Creating and fitting the model
knn = KNeighborsClassifier(n_neighbors = 5)
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [11]:
#Making predictions
y_pred = knn.predict(X_test)

In [12]:
#Visualizing the confusion matrix
confusion_matrix(y_test, y_pred)

array([[17,  0,  2],
       [ 1, 15,  5],
       [ 1,  5,  8]], dtype=int64)

In [13]:
#Visualizing accuracy score
accuracy_score(y_test, y_pred)

0.7407407407407407

#### Surprisingly we got a better accuracy score on our implimentation from scratch