## Problem:
Implement the k nearest neighbor algorithm and measure the accuracy as well as precision and recall for the classification of iris type 2 versus 1,3.

Submit a jupyter notebook.

## Solution:

In [1]:
# Necessary imports
import numpy as np
import pandas as pd
import heapq as hp

# Iris dataset
iris = pd.read_csv("data/iris.txt", sep=" ")

In [2]:
# Define A matrix with the features from the iris dataset
sepal_length = np.vstack(iris.sl)
sepal_width = np.vstack(iris.sw)
petal_length = np.vstack(iris.pl)
petal_width = np.vstack(iris.pw)
A = np.hstack((sepal_length, sepal_width, petal_length, petal_width))

# Define c matrix with the classes from the iris dataset
c = np.matrix(iris.c).T

In [3]:
# Define an object and overload custom comparison operators
class tup:
    def __init__(self, val, idx):
        self.val = val
        self.idx = idx
        
    def __lt__(self, other):
        '''Redefine for max-heap'''
        return self.val > other.val
    
    def __le__(self, other):
        return self.val <= other.val
 
    def __eq__(self, other):
        return self.val == other.val
    
    def __ne__(self, other):
        return self.val != other.val

    def __gt__(self, other):
        return self.val > other.val

    def __ge__(self, other):
        return self.val >= other.val

    def __str__(self):
        return '{:.3},{:d}'.format(self.val, self.idx)

In [4]:
# Define K Nearest Neighbor function
def kNearestNeighbor(K, A, test, classes):
    heap = []
    N = A.shape[0]
    
    # Fill in the heap with dummy nodes
    for k in range(K):
        hp.heappush(heap, tup(np.inf, -1))

    for i in range(N):
        e = A[i,:] - test
        e = e.reshape((4, 1))
        tp = tup(float(np.dot(e.T, e)), i)
        if tp <= heap[0]:
            hp.heapreplace(heap, tp)
    
    # Find maximum occurence
    categories = []
    for j in range(len(heap)):
        h = hp.heappop(heap)
        categories.append(int(classes[h.idx]))
    return max(set(categories), key=categories.count)

In [5]:
# Define method to find precision, recall and accuracy for the given category
def predictCondition(k, A, category):
    
    N = A.shape[0]
    false_positive = 0
    false_negative = 0
    true_positive = 0
    true_negative = 0
    
    
    for i in range(N):
        result = kNearestNeighbor(k, A, A[i,:], c);
        if(result == int(c[i]) and int(c[i]) == category):
            true_positive += 1
        if(result == int(c[i]) and int(c[i]) != category):
            true_negative += 1
        if(result != int(c[i]) and int(c[i]) == category):
            false_negative += 1
        if(result != int(c[i]) and result == category):
            false_positive += 1
            
    precision = true_positive / (true_positive + false_positive) * 100
    recall = true_positive / (true_positive + false_negative) * 100
    accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative) * 100
        
    return (true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy)

In [6]:
# Define k
k = 7

In [7]:
# Calculate and print results for the category 1
category = 1
true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy = predictCondition(k, A, category)
print("== Results For Category",category, "==")
print("Precision: %", precision)
print("Recall: %", recall)
print("Accuracy: %", accuracy)

== Results For Category 1 ==
Precision: % 100.0
Recall: % 100.0
Accuracy: % 100.0


In [8]:
# Calculate and print results for the category 2
category = 2
true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy = predictCondition(k, A, category)
print("== Results For Category",category, "==")
print("Precision: %", precision)
print("Recall: %", recall)
print("Accuracy: %", accuracy)

== Results For Category 2 ==
Precision: % 97.91666666666666
Recall: % 94.0
Accuracy: % 97.33333333333334


In [9]:
# Calculate and print results for the category 3
category = 3
true_positive, true_negative, false_positive, false_negative, precision, recall, accuracy = predictCondition(k, A, category)
print("== Results For Category",category, "==")
print("Precision: %", precision)
print("Recall: %", recall)
print("Accuracy: %", accuracy)

== Results For Category 3 ==
Precision: % 94.23076923076923
Recall: % 98.0
Accuracy: % 97.33333333333334
