In [45]:
import numpy as np

In [46]:
class KNN:
    X = None # features of training examples, numpy array of dimension m x n where m is number of examples and n is number of features
    y = None # target values or lables of training examples, numpy array dimension m x 1
    K = 5    # number of neighbours to consider for predictions

    def __init__(self, X_train, y_train, k = 5):
        self.X = X_train
        self.y = y_train
        self.K = k

    def distance(self, v1, v2):
        #Euclidean distance
        v1 = np.array(v1)
        v2 = np.array(v2)
        return np.sqrt(np.dot(v1-v2,v1-v2))
    
    def makeNeighbours(self, data):
        # function to get k nearest neighbours of given data point
        # data : features of data point for which neighbours are to be found, numpy array of length n where n is number of features
        # neighbours : neighbours of data point, numpy array of dimension k x 2 consisting of k neighbours [label or value, distance]

        neighbours = [ [y[i] , self.distance(X[i],data)] for i in range(len(X)) ]

        neighbours.sort(key = lambda x: x[1])

        neighbours = neighbours[:self.K]

        return np.array(neighbours)
    
    def regress(self, data):
        # function for regression using KNN on given data
        # data : features of data for making predictions, numpy array of dimension m x n where m is number of data points and n is number of features
        # predictions : value predicted by taking average of targets of k neighbours, numpy array of dimensions m x 1

        predictions = []

        for d in data:
            neighbours = self.makeNeighbours(d)
            
            pred = 0.0

            for n in neighbours:
                pred += n[0]
            
            pred /= len(neighbours)

            predictions.append(pred)

        return np.array(predictions)

    def classify(self, data):
        # function for classification using KNN on given data
        # data : features of data for making predictions, numpy array of dimension m x n where m is number of data points and n is number of features
        # predictions : value predicted by taking majority voting of labels of k neighbours, numpy array of dimensions m x 1
        
        predictions = []

        for d in data:
            neighbours = self.makeNeighbours(d)
            counts = {}

            for n in neighbours:
                if n[0] in counts:
                    counts[n[0]] += 1
                else:
                    counts[n[0]] = 1
            
            predictions.append(max(counts, key = counts.get))
        
        return np.array(predictions)

    def test(self, X_test, y_test, target_type = "cont"):
        # function to calculate root mean squared error for testing data
        # X_test : feature value for testing data, numpy array of dimension m x n where m is number of examples and n is number of features
        # y_test : target values for testing data, numpy array of dimension m x 1 where m is number of examples
        # target_type : wether target is discrete or continuous, cont for continuous and disc for discrete        

        pred = None
        if target_type == "cont":
            pred = self.regress(X_test)
        else:
            pred = self.classify(X_test)

        rmse = (np.dot(pred - y_test, pred - y_test)/len(pred))**0.5
        return rmse


In [47]:
X = np.array([[1,2,3,1,2], [4,5,2,6,4]])
y = np.array([0,1])

model = KNN(X,y,k= 3)

X_test = np.array([[1,1,1,1,1], [5,5,5,5,5]])
y_test = np.array([0,1])

print(model.test(X_test,y_test,"cont"))
print(model.test(X_test,y_test,"disc"))


0.5
0.0
