In [25]:
from collections import Counter
import math

def mode(labels):
    return Counter(labels).most_common(1)[0][0]

def euclidean_distance(point1, point2):
    sum_squared_distance = 0
    for i in range(len(point1)):
        sum_squared_distance += math.pow(point1[0][i] - point2[0][i], 2)
    return math.sqrt(sum_squared_distance)

In [26]:
def knn(data, query, k):
    neighbor_distances_and_indices = []
    
    # 3. For each example in the data
    for index, example in enumerate(data):
        # 3.1 Calculate the distance between the query example and the current
        # example from the data.
        distance = euclidean_distance(example[:-1], query)
        
        # 3.2 Add the distance and the index of the example to an ordered collection
        neighbor_distances_and_indices.append((distance, index))
    
    # 4. Sort the ordered collection of distances and indices from
    # smallest to largest (in ascending order) by the distances
    sorted_neighbor_distances_and_indices = sorted(neighbor_distances_and_indices)
    
    # 5. Pick the first K entries from the sorted collection
    k_nearest_distances_and_indices = sorted_neighbor_distances_and_indices[:k]
    
    # 6. Get the labels of the selected K entries
    k_nearest_labels = [data[i][-1] for distance, i in k_nearest_distances_and_indices]

    # 7. If regression (choice_fn = mean), return the average of the K labels
    # 8. If classification (choice_fn = mode), return the mode of the K labels
    return k_nearest_distances_and_indices , mode(k_nearest_labels)

In [41]:
def knn_weighted_sum(data, query, k):
    neighbor_distances_and_indices = []
    
    # 3. For each example in the data
    for index, example in enumerate(data):
        # 3.1 Calculate the distance between the query example and the current
        # example from the data.
        distance = euclidean_distance(example[:-1], query)
        
        # 3.2 Add the distance and the index of the example to an ordered collection
        neighbor_distances_and_indices.append((distance, example[-1]))
    
    # 4. Sort the ordered collection of distances and indices from
    # smallest to largest (in ascending order) by the distances
    sorted_neighbor_distances_and_indices = sorted(neighbor_distances_and_indices)
    
    # 5. Pick the first K entries from the sorted collection
    k_nearest_distances_and_indices = sorted_neighbor_distances_and_indices[:k]
    print(k_nearest_distances_and_indices)
    
     # compute weighted Sum of each target
    target2weight = {}
    c = 0.0001
    for distance,target in  k_nearest_distances_and_indices:
      weight = 1/(distance + c)
      if target in target2weight.keys():
        target2weight[target] = target2weight[target] + weight
      else:
        target2weight[target] = weight

    # Prediction is the target value with maximum weighted sum
    prediction = -1
    max_weighted_sum = -1
    for target in target2weight:
      if target2weight[target] > max_weighted_sum:
        prediction = target
        max_weighted_sum = target2weight[target]



    # 7. If regression (choice_fn = mean), return the average of the K labels
    # 8. If classification (choice_fn = mode), return the mode of the K labels
    return k_nearest_distances_and_indices , prediction

In [42]:
clf_data = [
    ((4,2),1),
    ((2,4),1),
    ((6,4),1),
    ((4,6),1),
    ((6,2),0),
    ((4,4),0)
]

clf_query = [(6,6)]
clf_k_nearest_neighbors, clf_prediction = knn(
    clf_data, clf_query, k=3
)

In [43]:
clf_k_nearest_neighbors

[(0.0, 2), (0.0, 4), (2.0, 0)]

In [44]:
clf_prediction

1

In [45]:
clf_k_nearest_neighbors, clf_prediction = knn_weighted_sum(
    clf_data, clf_query, k=3
)

[(0.0, 0), (0.0, 1), (2.0, 0)]


In [46]:
clf_k_nearest_neighbors

[(0.0, 0), (0.0, 1), (2.0, 0)]

In [47]:
clf_prediction

0