In [20]:
import csv
import numpy as np
from scipy import stats

In [21]:
"""
knn calculates accuracy of one leave out cross validation for kNN
Inputs: X is N-by-m data matrix with N observations (one per row) and m dimensions.
        Y is vector with N elements
        k is number of neighbours to use
Output: acc is fraction of correctly recognised cases
"""

def knn(X, Y, k):
    if type(X) != np.ndarray or len(X.shape) != 2 or X.shape[0] != len(Y)\
        or not type(k) == int or k <= 0:
            # raise Exception("error")
            raise Exception("Wrong types of parameters:\n" +\
                            "X must be numerical matrix\n" +\
                            'Y must be vector with the same nuber elements as number ' +\
                            'of rows in matrix X\n' +\
                            'k must be positive integer');
   
    # Calculate distances between all rows of matrix X
    # Firstly calculate squared length of each data vector
    d = np.expand_dims(np.sum(X ** 2, axis=1), axis=1)
    # Now use formula ||x-y||^2 = ||x||^2 - 2(x, y) +||y||^2
    dist = d + d.T - 2 * np.matmul(X, X.T);
    # sort each row of distances matrix
    ind = np.argsort(dist)
    # get the required fragment of class labels and calculate mode
    res = np.squeeze(stats.mode(Y[ind[:, 1:k+1]], axis=1)[0])
#    return sum(res == Y)
    return (sum(res == Y))/len(Y)

In [22]:
# load data
with open('Normalized Data - Copy.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    # Read all data to list of lists
    X = list(csv_reader)
    # get number of objects
    N = len(X)
    # Create array of labels
    lab = np.zeros(N);
    k = 0;
    for x in X:
        # get labels
        lab[k] = x[1] == 'M'
        # Shift index
        k += 1
        # Remove the first two elements
        del x[0:2]

data = np.array(X).astype("float")

del X, x, k, csv_reader, csv_file

# Call knn function
acc1 = knn(data, lab, 1)
acc3 = knn(data, lab, 3)
print(acc1,acc3)

0.9507908611599297 0.9648506151142355
