In [5]:
# Example of calculating Euclidean distance
from math import sqrt
'''
https://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/
'''

'\nhttps://machinelearningmastery.com/tutorial-to-implement-k-nearest-neighbors-in-python-from-scratch/\n'

Calculate the straight line distance between two vectors using the Euclidean distance measure. 
- It is calculated as the square root of the sum of the squared differences between the two vectors.
- Euclidean Distance = sqrt(sum i to N (x1_i – x2_i)^2)
- x1 is the first row of data, x2 is the second row of data and i is the index to a specific column as we sum across all columns.


In [12]:
# calculate the Euclidean distance between two vectors
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return sqrt(distance)

# locate neighbors
def get_neighbors(train, test_row, num_neighbors):
    distances = list()
    for train_row in train:
        dist = euclidean_distance(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = list()
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors

# Make a classification prediction with neighbors
def predict_classification(train, test_row, num_neighbors):
    neighbors = get_neighbors(train, test_row, num_neighbors)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)
    return prediction

In [13]:
# Test distance function
dataset = [[2.7810836,2.550537003,0],
    [1.465489372,2.362125076,0],
    [3.396561688,4.400293529,0],
    [1.38807019,1.850220317,0],
    [3.06407232,3.005305973,0],
    [7.627531214,2.759262235,1],
    [5.332441248,2.088626775,1],
    [6.922596716,1.77106367,1],
    [8.675418651,-0.242068655,1],
    [7.673756466,3.508563011,1]]
row0 = dataset[0]
for row in dataset:
    distance = euclidean_distance(row0, row)
    print('distance between 2 points: {}'.format(distance))

distance between 2 points: 0.0
distance between 2 points: 1.3290173915275787
distance between 2 points: 1.9494646655653247
distance between 2 points: 1.5591439385540549
distance between 2 points: 0.5356280721938492
distance between 2 points: 4.850940186986411
distance between 2 points: 2.592833759950511
distance between 2 points: 4.214227042632867
distance between 2 points: 6.522409988228337
distance between 2 points: 4.985585382449795


In [11]:
neighbors = get_neighbors(dataset, dataset[0], 3)
for neighbor in neighbors:
    print(neighbor)

[2.7810836, 2.550537003, 0]
[3.06407232, 3.005305973, 0]
[1.465489372, 2.362125076, 0]


In [14]:
prediction = predict_classification(dataset, dataset[0], 3)
print('Expected %d, Got %d.' % (dataset[0][-1], prediction))

Expected 0, Got 0.
