<a href="https://colab.research.google.com/github/pysaurav/ML-Track/blob/master/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
from math import sqrt
import numpy as np
from scipy.spatial import distance

In [0]:
df = pd.read_csv('knnData.csv')
df.head()

In [0]:
train_data =[] 
for index, rows in df.iterrows(): 
    train_item =[rows.trainPoints_x1, rows.trainPoints_x2, rows.trainLabel] 
    train_data.append(train_item) 
    
test_data =[] 
for index, rows in df.iterrows():
    test_item =[rows.testPoints_x1, rows.testPoints_x2, rows.testLabel] 
    test_data.append(test_item)

In [0]:
#Initializing the methods to realize the knn classifier


def nearest_neighbors(train, test_row, num_neighbors, distance_measure):
    distances = []
    for train_row in train:
        dist = distance_measure(test_row, train_row)
        distances.append((train_row, dist))
    distances.sort(key=lambda tup: tup[1])
    neighbors = []
    for i in range(num_neighbors):
        neighbors.append(distances[i][0])
    return neighbors

def predict_classification(train, test_row, num_neighbors, distance_measure):
    neighbors = nearest_neighbors(train, test_row, num_neighbors, distance_measure)
    output_values = [row[-1] for row in neighbors]
    prediction = max(set(output_values), key=output_values.count)
    return prediction

def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

def k_nearest_neighbors(train, test, num_neighbors, distance_measure):
    predictions = []
    for row in test:
        output = predict_classification(train, row, num_neighbors, distance_measure)
        predictions.append(output)
    return(predictions)

In [0]:
#Accuracy Using L2 Norm or Euclidean


def L2_norm_distance(row1, row2):
    return distance.euclidean(row1[:2], row2[:2])

num_neighbors = 3
scores = []
predicted = k_nearest_neighbors(train_data, test_data, num_neighbors, L2_norm_distance)

actual = [row[-1] for row in test_data]
accuracy = accuracy_metric(actual, predicted)
scores.append(accuracy)
print('Mean Accuracy using L2 Norm: %.2f%%' % (sum(scores)/float(len(scores))))


In [0]:
#Accuracy using L1 Norm or cityblock


def L1_norm_distance(row1, row2):
    return distance.cityblock(row1[:2], row2[:2])

num_neighbors = 3
scores = []
predicted = k_nearest_neighbors(train_data, test_data, num_neighbors, L1_norm_distance)

actual = [row[-1] for row in test_data]
accuracy = accuracy_metric(actual, predicted)
scores.append(accuracy)
print('Mean Accuracy using L1 Norm: %.2f%%' % (sum(scores)/float(len(scores))))

In [0]:
#Accuracy using L infinite or chebyshev distance


def L_infinite_norm_distance(row1, row2):
    return distance.chebyshev(row1[:2], row2[:2])

num_neighbors = 3
scores = []
predicted = k_nearest_neighbors(train_data, test_data, num_neighbors, L_infinite_norm_distance)

actual = [row[-1] for row in test_data]
accuracy = accuracy_metric(actual, predicted)
scores.append(accuracy)
print('Mean Accuracy using L-infinite Norm: %.2f%%' % (sum(scores)/float(len(scores))))