In [11]:
import numpy as np
import os
from collections import Counter
from classifier_utils import *

# get most common class in the instances
def vote(neighbors):
    class_counter = Counter()
    for neighbor in neighbors:
        class_counter[neighbor[-1]] += 1 
    #class_counter example format: Counter({0.0: 4, 1.0: 1})
    #class_counter.most_common return: key, value
    return class_counter.most_common(1)[0][0]

# get distance between 2 instances
def distance(instance1, instance2):
    # just in case, if the instances are lists or tuples:
    instance1 = np.array(instance1) 
    instance2 = np.array(instance2)
    # return 2-norm between instance1 and instance2
    return (1) sum(abs(instance1-instance2))

# get neighbors
def getNeighbors(trainingSet, 
                  testInstance, 
                  k, 
                  distance=distance):
    distances = []
    # for each instances in trainingSet
    for index in range(len(trainingSet)):
        # get distance from testInstance
        dist = distance(testInstance, trainingSet[index])
        # put (instance, distance, class of instance) into distances array
        distances.append((trainingSet[index], dist, trainingSet[index][-1]))
    
    # sort distances
    (2) distances= sort(distances)
    # get k neighbors
    neighbors = (3) distances[:k]
    return neighbors

def getPredictionsKNN(trainingSet, testSet, k):
    predictions = []
    # for each instances(vectors) in testSet
    for i in range(len(testSet)):
        # get K neighbors from trainingSet
        neighbors = getNeighbors(trainingSet, testSet[i], k, distance=distance)
        # get most common class in the neighbors
        prediction = vote(neighbors)
        # put result in the predictions
        predictions.append(prediction)
        
    return predictions


def function_i(label):
    # define function_i
    (4)if label==1:
        return 1
    else:
        return -1

def getPredictionsWeightedKNN(trainingSet, testSet, k):
    predictions = []
    # for each instances(vectors) in testSet
    for i in range(len(testSet)):
        # get K neighbors from trainingSet
        neighbors = getNeighbors(trainingSet, testSet[i], k, distance=distance)

        # Weighted KNN
        # get W & # get f_j
        weights = []
        fi_wi = []
        for neighbor in neighbors:
            # set d_ij(distance)
            d_ij = (5) neighbor[1]
            label = (6)neighbor[2]
            f_i = function_i(label)
            
            # Let w_i = (1 / d_ij)
            (7)w_i=(1/d_ij)
            # f_i * w_i
            (8)fi_wi=f_i*w_i
        # W = Sum([w_i])
        (9)W=sum(w_i)
        # f_j = Sum(fi_wi) / W
        (10)f_j=sum(fi_wi)/W
        # determine label by f_j
        if f_j <=0 (11):
            prediction = 0
        elif f_j >0(12):
            prediction = 1

        # put result in the predictions
        (13)predictions.append(prediction)
        
    return predictions


In [12]:
# Load csv data
# Current directory
filePath = './data.csv'
dataset = loadCsv(filePath)

# Split dataset into trainingSet and testSet
splitRatio = 0.67
trainingSet, testSet = splitDataset(dataset, splitRatio)
print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset), len(trainingSet), len(testSet)))

Split 768 rows into train=514 and test=254 rows


In [14]:
# Set K
K = 1

predictionsKNN = getPredictionsKNN(trainingSet, testSet, K)
accuracy = getAccuracy(testSet, predictionsKNN)
print('KNN Classifier, K = {0}'.format(K))
print('KNN Accuracy: {0}%'.format(accuracy))
predictionsWeightedKNN = getPredictionsWeightedKNN(trainingSet, testSet, K)
weighted_accuracy = getAccuracy(testSet, predictionsWeightedKNN)
print('\n\nWeighted KNN Classifier, K = {0}'.format(K))
print('Weighted KNN Accuracy: {0}%'.format(weighted_accuracy))

KNN Classifier, K = 1
KNN Accuracy: 66.14173228346458%


Weighted KNN Classifier, K = 1
Weighted KNN Accuracy: 66.14173228346458%


In [15]:
# Set K
K = 20

predictionsKNN = getPredictionsKNN(trainingSet, testSet, K)
accuracy = getAccuracy(testSet, predictionsKNN)
print('KNN Classifier, K = {0}'.format(K))
print('KNN Accuracy: {0}%'.format(accuracy))
predictionsWeightedKNN = getPredictionsWeightedKNN(trainingSet, testSet, K)
weighted_accuracy = getAccuracy(testSet, predictionsWeightedKNN)
print('\n\nWeighted KNN Classifier, K = {0}'.format(K))
print('Weighted KNN Accuracy: {0}%'.format(weighted_accuracy))

KNN Classifier, K = 20
KNN Accuracy: 74.80314960629921%


Weighted KNN Classifier, K = 20
Weighted KNN Accuracy: 76.37795275590551%
