In [1]:
import warnings
warnings.simplefilter('ignore')

import numpy as np
import pandas as pd
import sklearn

from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

#### Assignment 1

Write a KNN_classifier class suitable for solving a multiclass classification problem. Implement .fit() and .predict() methods. 
Note, that you can use only numpy library.

In [2]:
class KNN_classifier:
    def __init__(self, n_neighbors: int):
        self.K = n_neighbors

    def fit(self, X: np.array, y: np.array):
        self.X = X
        self.y = y
        return self

    def predict(self, x: np.array):
        
        predictions = []
        
        for j in range(x.shape[0]):
            
            dist = np.array([ np.linalg.norm(self.X[i, :] - x[j, :]) for i in range(self.X.shape[0]) ])
            nearest_ind = np.argsort(dist)
            
            k_neighbours = self.y[nearest_ind][:self.K]
            values, counts = np.unique(k_neighbours, return_counts=True)
            
            predictions.append(values[ np.argmax(counts) ])
        
        predictions = np.array(predictions)
        
        return predictions

#### Assignment 2

1. Train the model on the given dataset with two predictors X, Y ('data.csv'). Predict class of the object with predictors (54, 68).
2. Use KNeighborsClassifier implemented in sklearn. Find 3 closest neighbors of x=(54,68) and distances to them.

In [3]:
data = pd.read_csv('data.csv')
data.head(2)

Unnamed: 0,id,X,Y,Class
0,1,30,96,0
1,2,21,73,0


In [4]:
X = np.array([data['X'], data['Y']]).T
y = np.array(data['Class'])
z = np.array([54, 68])

In [5]:
model = KNN_classifier(n_neighbors=3).fit(X, y)

In [6]:
class_prediction = model.predict(z.reshape(1, -1))
print(f'Object x = {z} belongs to the class {class_prediction[0]}')

Object x = [54 68] belongs to the class 1


In [7]:
model = KNeighborsClassifier(n_neighbors=3).fit(X,y)
dist, neighs = model.kneighbors(z.reshape(1, -1), 3, return_distance=True)

In [8]:
print(f'ID of 3 nearest neighbours of x = {z}: {neighs[0, :] + 1}')
print('Distance is', np.round(dist[0, :],3))

ID of 3 nearest neighbours of x = [54 68]: [ 6  5 10]
Distance is [14.56  21.401 24.352]


#### Assignment 3

Train the model using load_breast_cancer  dataset. Use K = 8. Split the sample into training and test using the train_test_split function from the sklearn.model_selection module so that 0.3 objects fall into the test set. 

Estimate accuracy of the classification using accuracy_score. Apply KNN method implemented in sklearn and compare accuracies. 

In [9]:
random_seed = 4238
np.random.seed(random_seed)

X, y = load_breast_cancer(return_X_y=True)
X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=42)

In [10]:
model_1 = KNeighborsClassifier(n_neighbors=8, p=2)
model_1.fit(X_train, y_train)

predictions_1 = model_1.predict(x_test)

acc_1 = accuracy_score(y_test, predictions_1)
print('Accuracy of the classification using sklearn:', round(acc_1, 5))

Accuracy of the classification using sklearn: 0.96491


In [11]:
model_2 = KNN_classifier(n_neighbors=8)
model_2.fit(X_train, y_train)

predictions_2 = model_2.predict(x_test)
acc_2 = accuracy_score(y_test, predictions_2)

print('Accuracy of the classification using own implementation:', round(acc_2, 5))

Accuracy of the classification using own implementation: 0.96491


Accuracy comparison:

In [12]:
acc_1 - acc_2

0.0