In [1]:
import numpy as np


class KNN:
    def __init__(self, x_train, y_train):
        self.x_train = x_train
        self.y_train = y_train
        self.k = 1

    def predict(self, x_test):
        def euclidean(b):
            return np.sum(np.sqrt(np.power(x_test - b[0], 2)))

        sorted_x_train, sorted_y_train = zip(*sorted(zip(self.x_train, self.y_train), key=euclidean))
        chosen_k = sorted_y_train[0:self.k]
        return max(chosen_k, key=chosen_k.count)

    def accuracy(self, x_test, y_test, k):
        self.k = k
        y_hat = list(map(self.predict, x_test))
        true_no = np.count_nonzero(y_test == y_hat)
        return true_no


In [2]:
import pandas as pd
import numpy as np


def read_data(file_name):
    data = pd.read_csv(file_name)
    x = np.array(
        [data['x0'], data['x1'], data['x2'], data['x3'], data['x4'], data['x5'],
         data['x6'], data['x7']]).T
    y = np.array(data['y'])
    return x, y

In [3]:
import matplotlib.pyplot as plt
if __name__ == '__main__':
    x_train, y_train = read_data("TrainData.txt")
    x_test, y_test = read_data("TestData.txt")

    clf = KNN(x_train, y_train)
    accuracies = []
    for i in range(1, 10):
        print("k value:", i)
        correctly_classified = clf.accuracy(x_test, y_test, i)
        accuracy = correctly_classified / len(y_test)
        accuracies.append(accuracy)
        print("Number of correctly classified instances:", correctly_classified)
        print("Number of instances:", len(y_test))
        print("Accuracy:", accuracy)
        print("\n\n")


k value: 1


Number of correctly classified instances: 227
Number of instances: 445
Accuracy: 0.5101123595505618



k value: 2


Number of correctly classified instances: 227
Number of instances: 445
Accuracy: 0.5101123595505618



k value: 3


Number of correctly classified instances: 233
Number of instances: 445
Accuracy: 0.5235955056179775



k value: 4


Number of correctly classified instances: 251
Number of instances: 445
Accuracy: 0.5640449438202247



k value: 5


Number of correctly classified instances: 250
Number of instances: 445
Accuracy: 0.5617977528089888



k value: 6


Number of correctly classified instances: 256
Number of instances: 445
Accuracy: 0.5752808988764045



k value: 7


Number of correctly classified instances: 247
Number of instances: 445
Accuracy: 0.5550561797752809



k value: 8


Number of correctly classified instances: 254
Number of instances: 445
Accuracy: 0.5707865168539326



k value: 9


Number of correctly classified instances: 249
Number of instances: 445
Accuracy: 0.5595505617977528





In [4]:
plt.plot(accuracies)
plt.show()