In [5]:

import numpy as np
from sklearn.model_selection import train_test_split
import csv
import random
import math
import operator
 
def loadDataset(filename):
	Data = np.loadtxt(open(filename, "rb"), delimiter=";", skiprows=1)
	print('Length of Total Data:', len(Data))
	Train_set, Test_set = train_test_split(Data, test_size=0.2)
	return(Train_set,Test_set)
 
def euclideanDistance(first, second, length):
	distance = 0
	for x in range(length):
		distance += pow((first[x] - second[x]), 2)
	return math.sqrt(distance)
 
def getNeighbors(trainingSet, testInstance, k):
	distances = []
	length = len(testInstance)-1
	for x in range(len(trainingSet)):
		dist = euclideanDistance(testInstance, trainingSet[x], length)
		distances.append((trainingSet[x], dist))
	distances.sort(key=operator.itemgetter(1))
	neighbors = []
	for x in range(k):
		neighbors.append(distances[x][0])
	return neighbors
 
def getResponse(neighbors):
	classVotes = {}
	for x in range(len(neighbors)):
		response = neighbors[x][-1]
		if response in classVotes:
			classVotes[response] += 1
		else:
			classVotes[response] = 1
	sortedVotes = sorted(classVotes.items(), key=operator.itemgetter(1), reverse=True)
	return sortedVotes[0][0]
 
def getAccuracy(testSet, predictions):
	correct = 0
	for x in range(len(testSet)):
		if testSet[x][-1] == predictions[x]:
			correct += 1
	return (correct/float(len(testSet))) * 100
	
def main():
	trainingSet=[]
	testSet=[]
	split = 0.67
	j,i = loadDataset('Lab4Data.csv')
	print ('Train set: ' + repr(len(j)))
	print ('Test set: ' + repr(len(i)))
	print ('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%')
	# generate predictions
	predictions=[]
	n = 3
	for x in range(len(i)):
		neighbors = getNeighbors(j, i[x], n)
		result = getResponse(neighbors)
		predictions.append(result)
		print('# predicted=' + repr(result) + ', actual=' + repr(i[x][-1]))
	accuracy = getAccuracy(i, predictions)
	print('Accuracy: ' + repr(accuracy) + '%')
	
main()


Length of Total Data: 2000
Train set: 1600
Test set: 400
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=1.0, actual=1.0
# predicted=3.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=2.0, actual=1.0
# predicted=2.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=3.0, actual=1.0
# predicted=2.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=3.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=2.0, actual=2.0
# predicted=1.0, actual=2.0
# predicted=3.0, actual=3.0
# predi

# predicted=1.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=1.0, actual=1.0
# predicted=1.0, actual=1.0
# predicted=2.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=1.0, actual=1.0
# predicted=1.0, actual=2.0
# predicted=2.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=1.0, actual=1.0
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=3.0
# predicted=3.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=1.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=2.0
# predicted=3.0, actual=3.0
# predicted=2.0, actual=1.0
# predicted=1.0, actual=1.0
# predicted=1.0, actual=2.0
# predicted=2.0, actual=2.0
# predicted=3.0, act