In [2]:
from collections import Counter
import math
import pandas as pd

def knn(data, query, k, distance_fn, choice_fn, attrNumber):
    neighbor_distances_and_indices = []
    
    # 3. For each example in the data
    for index, example in enumerate(data):
        # 3.1 Calculate the distance between the query example and the current
        # example from the data.
        distance = distance_fn(example, query, attrNumber)
        
        # 3.2 Add the distance and the index of the example to an ordered collection
        neighbor_distances_and_indices.append((distance, index))
    
    # 4. Sort the ordered collection of distances and indices from
    # smallest to largest (in ascending order) by the distances
    sorted_neighbor_distances_and_indices = sorted(neighbor_distances_and_indices)
    
    # 5. Pick the first K entries from the sorted collection
    k_nearest_distances_and_indices = sorted_neighbor_distances_and_indices[:k]
    
    # 6. Get the labels of the selected K entries
    k_nearest_labels = [data[i][-1] for distance, i in k_nearest_distances_and_indices]
    # 7. If regression (choice_fn = mean), return the average of the K labels
    # 8. If classification (choice_fn = mode), return the mode of the K labels
    return k_nearest_distances_and_indices , choice_fn(k_nearest_labels)

def mode(labels):
    return Counter(labels).most_common(1)[0][0]

def euclidean_distance(sample, target, attrNumber):
    sum_squared_distance = 0
    for i in range(attrNumber):
        sum_squared_distance += math.pow(sample[i] - target[i], 2)
    return math.sqrt(sum_squared_distance)

def main():
    data = pd.read_csv("preprocessed_data.csv")
    selected_column1 = "sit_and_bend_forward_cm"
    selected_column2 = "sit_ups_counts"
    selected_column3 = "broad_jump_cm"

    data1 = data[[selected_column1,selected_column2,selected_column3,'class']]

    data1 = data1.values.tolist()
    data1_predict = [9,10,10]
    print("Predict on the value ", data1_predict)

    # Euclidean function

    print("Using Euclidean function")
    reg_k_nearest_neighbors, reg_prediction = knn(
        data1, data1_predict, k=7, distance_fn=euclidean_distance, choice_fn=mode,attrNumber=3)
    print("\t\toutput = ", reg_prediction)

if __name__ == '__main__':
    main()



Predict on the value  [9, 10, 10]
Using Euclidean function
		output =  D


In [24]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd

data = pd.read_csv("preprocessed_data.csv")
selected_column = ["sit_and_bend_forward_cm", "sit_ups_counts", "broad_jump_cm", "gripforce", "age", "body_fat_%", "class"]

data1 = data[selected_column]
attrNumber = len(selected_column)-1

train_data, test_data = train_test_split(data1, test_size=0.2)

predict = []
train_data = train_data.values.tolist()
for i in range(len(test_data)):
    test = test_data.iloc[i]
    test = [test[0],test[1],test[2],test[3],test[4],test[5]]
    reg_k_nearest_neighbors, reg_prediction = knn(
        train_data, test, k=1, distance_fn=euclidean_distance, choice_fn=mode, attrNumber=attrNumber
    )
    predict.append(reg_prediction)

In [23]:
print(predict)
real = test_data['class']
real = real.values.tolist()
print(real)

count = 0
for i in range(len(real)):
    if predict[i] == real[i]:
        count=count+1

print(str(count))
print(len(real))

['A', 'B', 'A', 'C', 'A', 'D', 'D', 'C', 'B', 'C', 'B', 'C', 'C', 'D', 'B', 'A', 'C', 'B', 'C', 'C', 'B', 'C', 'B', 'B', 'A', 'A', 'D', 'A', 'B', 'D', 'D', 'C', 'A', 'D', 'C', 'C', 'A', 'B', 'C', 'A', 'B', 'A', 'B', 'C', 'B', 'D', 'B', 'B', 'A', 'A', 'B', 'D', 'D', 'C', 'C', 'A', 'B', 'A', 'A', 'D', 'A', 'C', 'A', 'A', 'C', 'C', 'B', 'A', 'B', 'A', 'A', 'B', 'A', 'A', 'A', 'A', 'C', 'A', 'C', 'B', 'A', 'D', 'B', 'A', 'A', 'D', 'A', 'A', 'B', 'A', 'A', 'A', 'C', 'B', 'C', 'C', 'C', 'A', 'C', 'D', 'D', 'C', 'A', 'B', 'D', 'B', 'B', 'A', 'A', 'C', 'D', 'B', 'C', 'A', 'A', 'A', 'B', 'D', 'A', 'A', 'B', 'B', 'A', 'B', 'D', 'C', 'C', 'C', 'A', 'A', 'C', 'B', 'D', 'A', 'B', 'C', 'A', 'A', 'C', 'D', 'C', 'C', 'C', 'D', 'D', 'D', 'B', 'A', 'B', 'B', 'B', 'A', 'B', 'A', 'D', 'C', 'C', 'A', 'B', 'C', 'C', 'C', 'C', 'A', 'C', 'D', 'C', 'D', 'A', 'A', 'B', 'C', 'B', 'A', 'D', 'B', 'A', 'D', 'A', 'C', 'D', 'C', 'B', 'D', 'B', 'B', 'A', 'C', 'A', 'B', 'C', 'C', 'B', 'B', 'A', 'A', 'A', 'C', 'B', 'A',