In [3]:
import pandas as pd
import numpy as np

data = pd.read_csv('sonar_train.csv',header='infer')


'''-------------------------
    SUPPORTIVE FUNCTIONS
-------------------------'''

# Function to return (float) euclidean distance 
def euclideanDistance(vector_1, vector_2, attr_length):
	distance = 0.0

	# Go through attributes in attr_length and add the squares
	for x in range(attr_length):
		distance += (vector_1[x] - vector_2[x]) ** 2

	# Square the distance before return
	return np.sqrt(distance)


# Function to return (float) manhattan distance
def manhattanDistance(vector_1, vector_2, attr_length):
	distance = 0.0

	# Go through attributes in attr_length and add the abs values
	for x in range(attr_length):
		distance += np.absolute(vector_1[x] - vector_2[x])

	return distance


# Function to return (vector) of K neighbours 
def getNeighbours(traingSet, testingVector, k, algorithm, attr_length):

	distances = []

	# Calculate the distance of every vector in the training set
	for i in range(len(traingSet)):

		# What algorithm to use (euclidean or manhattan)
		if algorithm == 'euclidean':
			dist = euclideanDistance(testingVector, traingSet[i], attr_length)
		else:
			dist = manhattanDistance(testingVector, traingSet[i], attr_length)

		# Save the set, the distance and the index
		distances.append([traingSet[i], dist, i])

	# Convert to NP vector and sort by index 1 (distance)
	distances = np.array(distances)
	dsorted = distances[np.argsort(distances[:, 1])]

	neighbours = []

	# Get the first K elements
	for i in range(k):
		neighbours.append(dsorted[i])

	return neighbours


# Function to return (integer) the class predicted 
def getPredicted(neighbours, y):

	classes = {}

	# Go through each neighbours and count their classes
	for i in range(len(neighbours)):
		
		# Get the class of this vector from 'y' (Use same index of X and y)
		thisClass = y[neighbours[i][2]]

		# Populate the dictionary as {class: counter}
		if(thisClass in classes):
			classes[thisClass] += 1
		else:
			classes[thisClass] = 1

	# The return [Class_key, Class_counter]
	sendClass = [0,0]

	# Loop the dictionary and evaluate the values storing the largest in sendClass
	for key, value in classes.items():
		if value > sendClass[1]:
			sendClass = (key, value)

	return sendClass[0]



'''------------------
    MAIN FUNCTION
------------------'''

# Function to return (vector) of all predictions for testing data
def mykNN(X, y, X_, options):

	allPredictions = []

	# Return empty vector if there is not K and Algorithm in options
	if(len(options) < 2):
		return np.array(allPredictions)

	# Go through each vector in the testing data
	for i in range(len(X_)):

		# Convert to NP array
		checkthis = np.array(X_[i])
		totalAttr = 2

		# If there is a third parameter (Total attributes)
		if(len(options) >= 3):
			totalAttr = options[2]

		# Find all neighbours and the prediction for this test vector 
		allNeighbours = getNeighbours(X, checkthis, options[0], options[1], totalAttr)
		prediction = getPredicted(allNeighbours, y)

		# Save each prediciton
		allPredictions.append(prediction)


	return np.array(allPredictions)



# '''-------------
#     TEST KNN
# -------------'''

# foldTrain, foldTest = X[:120], X[120:] 

# X_ = [[0.1, 0.2, 8, 7], [1, 4, 8, 7]]
# options = [3, 'euclidean', 4]

# X: Whole training set
# y: All classes for training set
# X_: Testing set
# y_: Prediction for testing set
# options: Array [k, algorithm, (attributes to count)]
# y_ = mykNN(foldTrain, y, foldTest, options)

# print(y[120:])
print(data)

         A1      A2      A3      A4      A5      A6      A7      A8      A9  \
0    0.0079  0.0086  0.0055  0.0250  0.0344  0.0546  0.0528  0.0958  0.1009   
1    0.0599  0.0474  0.0498  0.0387  0.1026  0.0773  0.0853  0.0447  0.1094   
2    0.0093  0.0269  0.0217  0.0339  0.0305  0.1172  0.1450  0.0638  0.0740   
3    0.0151  0.0320  0.0599  0.1050  0.1163  0.1734  0.1679  0.1119  0.0889   
4    0.0317  0.0956  0.1321  0.1408  0.1674  0.1710  0.0731  0.1401  0.2083   
..      ...     ...     ...     ...     ...     ...     ...     ...     ...   
134  0.0731  0.1249  0.1665  0.1496  0.1443  0.2770  0.2555  0.1712  0.0466   
135  0.0516  0.0944  0.0622  0.0415  0.0995  0.2431  0.1777  0.2018  0.2611   
136  0.0015  0.0186  0.0289  0.0195  0.0515  0.0817  0.1005  0.0124  0.1168   
137  0.0411  0.0277  0.0604  0.0525  0.0489  0.0385  0.0611  0.1117  0.1237   
138  0.0270  0.0163  0.0341  0.0247  0.0822  0.1256  0.1323  0.1584  0.2017   

        A10  ...     A52     A53     A54     A55   