In [1]:
from math import sqrt, pow, exp
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from statistics import mode

In [2]:
def euclidean_distance(point1, point2):
    sum_squared_distance = 0
    for i in range(len(point1)):
        sum_squared_distance += pow(point1[i] - point2[i], 2)
    return sqrt(sum_squared_distance)

In [11]:
def knn(data, query, k):
    neighbor_distances_and_indices = []
    
    
    for index, example in enumerate(data):
        distance = euclidean_distance(example[:-1], query)
        neighbor_distances_and_indices.append((distance, index))
    
    sorted_neighbor_distances_and_indices = sorted(neighbor_distances_and_indices)    
    k_nearest_distances_and_indices = sorted_neighbor_distances_and_indices[:k]
    k_nearest_labels = [data[i][-1] for distance, i in k_nearest_distances_and_indices]
    
    return k_nearest_distances_and_indices , mode(k_nearest_labels)


In [12]:
training_data = [
    [4, 4, 'blue'],
    [6, 2, 'blue'],
    [2, 4, 'orange'],
    [6, 4, 'orange'],
    [4, 6, 'orange'],
    [4, 2, 'orange']
]

test_data = [6,6]

In [13]:
clf_k_nearest_neighbors, clf_prediction = knn(training_data, test_data, k=3)

In [6]:
clf_prediction

'orange'

In [16]:
clf_k_nearest_neighbors

[(2.0, 3), (2.0, 4), (2.8284271247461903, 0)]

In [17]:
training_data[clf_k_nearest_neighbors[0][1]][2]

'orange'

In [31]:
def predict_weighted_knn(neighbours):
    
    class_count = {}
    for neighbour in neighbours:
        label = training_data[neighbour[1]][2]
        if label in class_count:
            class_count[label] += 1/neighbour[0]
        else:
            class_count[label] = 1/neighbour[0]
    sorted_class_count = sorted(class_count.items(), key = lambda item : item[1])
    print("Class count : ", sorted_class_count)
    return sorted_class_count[-1][0]

In [32]:
predict_weighted_knn(clf_k_nearest_neighbors)

Class count :  [('blue', 0.35355339059327373), ('orange', 1.0)]


'orange'

In [35]:
def predict_locally_weighted_knn(neighbours):
    class_count = {}
    for neighbour in neighbours:
        label = training_data[neighbour[1]][2]
        if label in class_count:
            class_count[label] += 1/exp(2*neighbour[0])
        else:
            class_count[label] = 1/exp(2*neighbour[0])
    sorted_class_count = sorted(class_count.items(), key = lambda item : item[1])
    print("Class count : ", sorted_class_count)
    return sorted_class_count[-1][0]

In [36]:
predict_locally_weighted_knn(clf_k_nearest_neighbors)

Class count :  [('blue', 0.0034934892766462005), ('orange', 0.036631277777468364)]


'orange'