In [1]:
from sklearn.datasets import make_blobs 
import numpy as np
import matplotlib.pyplot as plt



class CustomKNN:
    
    def __init__(self, k):
        self.k = k
        
    def fit(self, X, y):
        self.X = X
        self.y = y
        
    def predict_point(self, point):
        dist = np.power(self.X - point, 2).sum(axis=1)
        closestk = np.argsort(dist)[:self.k]
        votes = self.y[closestk]
        uniques, freq = np.unique(votes, return_counts=True)
        return uniques[freq.argmax()]
        
        
    def predict(self, X):
        
        results = []
        
        for point in X:
            results.append(self.predict_point(point))
            
        return np.array(results)
    
    
    def score(self, X, y):
        
        yp = self.predict(X)
        return (y==yp).mean()

In [2]:
import pandas as pd

df = pd.read_csv("../datasets/Iris.csv", index_col=0)

In [3]:

data = df.values

X = data[:, :4]
y = data[:, 4]


In [4]:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
model = CustomKNN(5)

In [6]:
model.fit(X_train, y_train)


In [7]:
y_test[:20]


array(['Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica'],
      dtype=object)

In [8]:
model.predict(X_test[:20])


array(['Iris-versicolor', 'Iris-setosa', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-setosa',
       'Iris-versicolor', 'Iris-virginica', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-virginica', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-virginica',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-virginica'],
      dtype='<U15')

In [9]:
model.score(X_test, y_test)


0.98