In [189]:
import pandas as pd
import numpy as np
from sklearn import datasets
from collections import Counter
iris = datasets.load_iris()
irisDf = pd.DataFrame(iris.data, columns=iris.feature_names)
irisDf['target'] = iris.target


In [190]:
def Train_Test_Split(dataset, target, test_size = 0, random_state=None):
    dataset['target'] = target
    shuffled_data = dataset.sample(frac = 1, random_state = random_state)
    test_size = int(test_size * len(dataset))
    train_size = int(len(dataset) - test_size)
    # Splitted Training Data  
    training_dataset = shuffled_data[:train_size]
    train_x = training_dataset.drop('target', axis = 1)
    train_y = training_dataset['target']
    # Splitted Testing Data
    test_dataset = shuffled_data[train_size:]
    test_x = test_dataset.drop('target', axis = 1)
    test_y = test_dataset['target']
    
    return train_x, test_x, train_y, test_y

In [191]:
xtrain, xtest, ytrain, ytest = Train_Test_Split(irisDf.drop('target', axis = 1), irisDf['target'], test_size = 0.2, random_state=10)

In [192]:
class KNN:
    def __init__(self, k):
        self.k = k
        
    def fit(self, x, t):
        self._xtrain = x
        self._ytrain = t
    def find_class(self, knearest):
        classes_count = Counter(knearest).most_common(1)[0]
        return classes_count[0]
    
    def predict(self, xtest):
        test_size = len(xtest)
        train_size = len(self._xtrain)
        classes = []
        for i in range(test_size):
            distances=[]
            for j in range(train_size):
                euclidean = np.sqrt(np.sum(((xtest.iloc[i, :] - self._xtrain.iloc[j, :]) ** 2)))
                distances.append(euclidean)
            k_nearest_indices = np.argsort(distances)[:self.k]
            k_nearest_labels = [self._ytrain.iloc[index] for index in k_nearest_indices]
            # Predicted Class             
            cls = self.find_class(k_nearest_labels)
            classes.append(cls)
            
        return np.array(classes)
    
    def accuracy_score(self, true_class, predicted):
        count = 0
        for t, y in zip(true_class, predicted):
            if(t == y):
                count += 1
        return count / len(true_class)
        
    def score(self, x, t):
        y = self.predict(x)
        return self.accuracy_score(t, y)

In [193]:
model = KNN(5)
model.fit(xtrain, ytrain)
predicted_classes = model.predict(xtest)

In [194]:
train_score = model.score(xtrain, ytrain)
test_score = model.score(xtest, ytest)
print("Score on training data: ",train_score)
print("Score on testing data: ",test_score)

Score on training data:  0.9666666666666667
Score on testing data:  0.9666666666666667
