In [1]:
from csv import reader
from sys import exit
from math import sqrt
from operator import itemgetter

In [3]:
def load_data_set(filename):
    try:
        with open(filename, newline='') as iris:
            return list(reader(iris, delimiter=','))
    except FileNotFoundError as e:
        raise e


In [4]:
def convert_to_float(data_set, mode):
    new_set = []
    try:
        if mode == 'training':
            for data in data_set:
                new_set.append([float(x) for x in data[:len(data)-1]] + [data[len(data)-1]])

        elif mode == 'test':
            for data in data_set:
                new_set.append([float(x) for x in data])

        else:
            print('Invalid mode, program will exit.')
            exit()

        return new_set

    except ValueError as v:
        print(v)
        print('Invalid data set format, program will exit.')
        exit()


In [5]:
def get_classes(training_set):
    return list(set([c[-1] for c in training_set]))

In [7]:
def find_neighbors(distances, k):
    return distances[0:k]

In [8]:
def find_response(neighbors, classes):
    votes = [0] * len(classes)

    for instance in neighbors:
        for ctr, c in enumerate(classes):
            if instance[-2] == c:
                votes[ctr] += 1

    return max(enumerate(votes), key=itemgetter(1))

In [9]:
def knn(training_set, test_set, k):
    distances = []
    dist = 0
    limit = len(training_set[0]) - 1

    # generate response classes from training data
    classes = get_classes(training_set)

    try:
        for test_instance in test_set:
            for row in training_set:
                for x, y in zip(row[:limit], test_instance):
                    dist += (x-y) * (x-y)
                distances.append(row + [sqrt(dist)])
                dist = 0

            distances.sort(key=itemgetter(len(distances[0])-1))

            # find k nearest neighbors
            neighbors = find_neighbors(distances, k)

            # get the class with maximum votes
            index, value = find_response(neighbors, classes)

            # Display prediction
            print('The predicted class for sample ' + str(test_instance) + ' is : ' + classes[index])
            print('Number of votes : ' + str(value) + ' out of ' + str(k))

            # empty the distance list
            distances.clear()

    except Exception as e:
        print(e)


In [11]:
def main():
    try:
        # get value of k
        k = int(input('Enter the value of k : '))

        # load the training and test data set
        training_file = input('Enter name of training data file : ')
        test_file = input('Enter name of test data file : ')
        training_set = convert_to_float(load_data_set(training_file), 'training')
        test_set = convert_to_float(load_data_set(test_file), 'test')

        if not training_set:
            print('Empty training set')

        elif not test_set:
            print('Empty test set')

        elif k > len(training_set):
            print('Expected number of neighbors is higher than number of training data instances')

        else:
            knn(training_set, test_set, k)

    except ValueError as v:
        print(v)

    except FileNotFoundError:
        print('File not found')


if __name__ == '__main__':
    main()

Enter the value of k : 3
Enter name of training data file : iris-dataset.csv
Enter name of test data file : iris-test.csv
The predicted class for sample [4.3, 2.9, 1.7, 0.3] is : Iris-setosa
Number of votes : 3 out of 3
The predicted class for sample [4.6, 2.7, 1.5, 0.2] is : Iris-setosa
Number of votes : 3 out of 3
The predicted class for sample [5.3, 3.4, 1.6, 0.2] is : Iris-setosa
Number of votes : 3 out of 3
The predicted class for sample [5.2, 4.1, 1.5, 0.1] is : Iris-setosa
Number of votes : 3 out of 3
The predicted class for sample [6.0, 2.2, 4.2, 1.0] is : Iris-versicolor
Number of votes : 3 out of 3
The predicted class for sample [6.2, 2.3, 4.5, 1.5] is : Iris-versicolor
Number of votes : 3 out of 3
The predicted class for sample [5.0, 2.1, 3.6, 1.2] is : Iris-versicolor
Number of votes : 3 out of 3
The predicted class for sample [6.6, 2.8, 5.4, 2.0] is : Iris-virginica
Number of votes : 3 out of 3
The predicted class for sample [6.4, 3.2, 5.3, 2.3] is : Iris-virginica
Number 