In [1]:
#Import required python modules
import numpy as np 
import matplotlib.pyplot as plt 

In [2]:
#load data from file
data = np.genfromtxt('iris.csv', delimiter=',',skip_header=True)

input_columns = [0,1]
output_column = [5]

#Distribute data into train and test sets
X_train = data[:80,input_columns]
Y_train = data[:80,output_column]

X_test = data[-20:,input_columns]
Y_test = data[-20:,output_column]


In [3]:
#Distance calculation between two data points
def euclidean_distance(instance1, instance2):
    distance = 0.0
    for i in range(len(instance1)):
        distance += (instance1[i] - instance2[i])**2
    return np.sqrt(distance)

#Make prediction of the test points using training points
def prediction(X_train, Y_train, X_test, n_neighbors=3):
    allTestNeighbers=[]
    allPredictedOutputs =[]
    
    #Determine Number of unique class lebels
    uniqueOutputLabels = []
    for label in Y_train:
        if label not in uniqueOutputLabels:
            uniqueOutputLabels.append(label)
    uniqueOutputCount = len(uniqueOutputLabels)
    
    #calculate for earch test data points
    for testInput in X_test:
        allDistances = []
        for trainInput, trainActualOutput in zip(X_train, Y_train):
            distance = euclidean_distance(testInput, trainInput)
            allDistances.append((trainInput, trainActualOutput, distance))
        #Sort (in ascending order) the training data points based on distances from the test point     
        allDistances.sort(key=lambda x: x[2])
        
        
        #Assuming output labels are from 0 to uniqueOutputCount-1
        voteCount = np.zeros(uniqueOutputCount)
        neighbors = []
        for n in range(n_neighbors):
            neighbors.append(allDistances[n][0])
            class_label = int(allDistances[n][1])
            voteCount[class_label] += 1
        
        #Determine the Majority Voting (Equal weight considered)
        predictedOutput = np.argmax(voteCount)
        
        allTestNeighbers.append(neighbors)
        allPredictedOutputs.append(predictedOutput)
        
    return allPredictedOutputs, allTestNeighbers
        

def performanceEvaluation(X_train, Y_train, X_test, Y_test, n_neighbors=3):
    totalCount = 0
    correctCount = 0
    
    for testInput, testActualOutput in zip(X_test, Y_test):
        predictedOutput,_ = prediction(X_train, Y_train, [testInput], n_neighbors)
        
        if predictedOutput == testActualOutput:
            correctCount += 1
        totalCount += 1
    
    print("Total Correct Count: ",correctCount," Total Wrong Count: ",totalCount-correctCount," Accuracy: ",(correctCount*100)/(totalCount))

        
        
performanceEvaluation(X_train, Y_train, X_test, Y_test,3)        

Total Correct Count:  19  Total Wrong Count:  1  Accuracy:  95.0


In [4]:
example_input = [[5.2,2.9]]
prediction(X_train, Y_train, example_input, n_neighbors=3)

([0], [[array([5.2, 2.7]), array([5., 3.]), array([5.4, 3. ])]])