In [1]:
import numpy as np
from sklearn.datasets import load_iris, load_breast_cancer
from collections import Counter
from scipy.spatial.distance import euclidean


class KNN:

    def __init__(self, k=3, is_classification=True, distance_name='Euclidean'): # __init__ is the constructor function in python.
        # When, later on in your code, you create an object with "variable = KNN(), this function is called"
        self.k = k
        self.distance_name = distance_name
        self.is_classification = is_classification

        # initializing a variable to store the values of the X and y that will be fitted to the model.
        # Remember: KNN does not have to train the model, just store their values. Use these variables to do that
        self.X = None 
        self.y = None

    def _compute_distance(self, x1, x2, distance_name='Euclidean'):
        # Here you will implement your distance function ie. Euclidean distance.
        if distance_name == 'Euclidean':
            distance = euclidean(x1, x2)# implement the formula for Euclidean Distance -> https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.euclidean.html#scipy.spatial.distance.euclidean
            
#         if distance_name == 'Manhattan': # you can change this for other kinds of distances
#             distance = # implement the formula for Manhattan Distance here
        
        return distance

    def fit(self, X, y):
        # Remember, you do not have to train a model for KNN, but mainly store the variables
        self.X = X
        self.y = y
    
    
    def predict(self, x):
        # For this exercise, we will do the prediction for a single instance.
        # x here is just a single instance.
        distances = []
        # Complete this part of the code. 
        
        # Step 1: You will compare the distance of x to each other instance in your training data.  
        # Use the function implemented in _compute_distances
        for training_instance in self.X:
            distances.append(self._compute_distance(x, training_instance))
            
        #Step 2: Now that you have the distances, get the labels of each one of the k-nearest instances
        # Hint: the class numpy.argsort returns the index of the sorted values, it might be useful here
        ordered_indexes = np.argsort(distances)
        
        label_of_neighs = []
        for idx in ordered_indexes[:self.k]:
            label_of_neighs.append(self.y[idx])
            
        #Step 3: Make the prediction 
        if self.is_classification:
            return Counter(label_of_neighs).most_common(1)[0][0]# For classification, you will return the most common class among the nearest neighbors
        else:
            return # For regression, you will return the mean of the values of the nearest neighbors
        
        

In [2]:
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
X_train, X_test, y_train, y_test = train_test_split(
     X, y, test_size=0.33, random_state=42)

knn = KNN(3)

In [7]:
X_train

array([[1.546e+01, 1.189e+01, 1.025e+02, ..., 1.827e-01, 3.216e-01,
        1.010e-01],
       [1.285e+01, 2.137e+01, 8.263e+01, ..., 5.601e-02, 2.488e-01,
        8.151e-02],
       [1.921e+01, 1.857e+01, 1.255e+02, ..., 2.091e-01, 3.537e-01,
        8.294e-02],
       ...,
       [1.429e+01, 1.682e+01, 9.030e+01, ..., 3.333e-02, 2.458e-01,
        6.120e-02],
       [1.398e+01, 1.962e+01, 9.112e+01, ..., 1.827e-01, 3.179e-01,
        1.055e-01],
       [1.218e+01, 2.052e+01, 7.722e+01, ..., 7.431e-02, 2.694e-01,
        6.878e-02]])

In [4]:
knn.fit(X_train, y_train)

In [5]:

knn._compute_distance(X_train[0], X_train[1])

509.2318808536851

In [9]:
knn.predict(X_train[0]) # test prediction

0

In [10]:
y_pred = []
# This code is running the prediction for each one of the instances in the test set.
for instance in X_test:
    y_pred.append(knn.predict(instance))

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.93      0.92        67
           1       0.96      0.95      0.95       121

    accuracy                           0.94       188
   macro avg       0.94      0.94      0.94       188
weighted avg       0.94      0.94      0.94       188

