## KNN From Scratch

### Reading data

In [1]:
import numpy as np
import pandas as pd
from math import sqrt

# reading datas
df = pd.read_table('../dataset/occupancy/datatraining.txt', skiprows=1, names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'), sep=',')
test1 = pd.read_table('../dataset/occupancy/datatest.txt', skiprows=1, names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'), sep=',')
test2 = pd.read_table('../dataset/occupancy/datatest2.txt', skiprows=1, names=('A', 'B', 'C', 'D', 'E', 'F', 'Occ'), sep=',')

columns = ['B', 'C', 'D', 'E', 'F', 'Occ']

df = df[columns]
df.head()

Unnamed: 0,B,C,D,E,F,Occ
1,23.18,27.272,426.0,721.25,0.004793,1
2,23.15,27.2675,429.5,714.0,0.004783,1
3,23.15,27.245,426.0,713.5,0.004779,1
4,23.15,27.2,426.0,708.25,0.004772,1
5,23.1,27.2,426.0,704.5,0.004757,1


In [2]:
test1_X = test1[['B', 'C', 'D', 'E', 'F']]
test1_Y = test1['Occ']

### Euclidean Distance

In [3]:
def Euclidean_dist(a, b):
    dis = 0
    for i in list(a.columns):
        dis += (float(a[i])-float(b[i]))**2
    return sqrt(dis)

### Chebyshev Distance

In [4]:
def Chebysehvs_dist(a, b):
    dis = list()
    for i in list(a.columns):
        dis.append(abs(float(a[i])-float(b[i])))
    return max(dis)

### Classification using KNN

In [5]:
Y = list()
for i in range(0,len(test1_X)):
    sample = test1_X[i:i+1]
    dis = list()
    kn = list()
    l = list()
    for j in range(0,len(df)):
        dis.append(Chebysehvs_dist(sample, df[j:j+1]))
    for j in range(5):
        mx = dis.index(min(dis))
        kn.append(int(df[mx:mx+1]['Occ']))
        dis[mx] = 0
    cl0 = kn.count(0)
    cl1 = kn.count(1)
#     print(i, kn)
    if cl0>=cl1:
        Y.append(0)
    else:
        Y.append(1)

print(Y)

[0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 

### Calculating Accuracy

In [6]:
y_out = np.array(Y)
gr_val = np.array(test1_Y)

tp_bool = np.logical_and((y_out==1),(gr_val==1))
tn_bool = np.logical_and((y_out==0),(gr_val==0))
fp_bool = np.logical_and((y_out==1),(gr_val==0))
fn_bool = np.logical_and((y_out==0),(gr_val==1))
tp = len(y_out[tp_bool])
tn = len(y_out[tn_bool])
fp = len(y_out[fp_bool])
fn = len(y_out[fn_bool])
print(tp,tn,fp,fn)

accuracy = (tp+tn)/(tp+tn+fp+fn)
print("accuracy : "+str(accuracy))

# precision = tp/(total yes predictions)
precision = tp/(len(y_out[y_out==1]))
print("precision : "+str(precision))

specificity = tp/(tp+fn)
print("specificity : "+str(specificity))

sensitivity = tn/(tn+fp)
print("sensitivity : "+str(sensitivity))

195 435 23 8
accuracy : 0.9531013615733737
precision : 0.8944954128440367
specificity : 0.9605911330049262
sensitivity : 0.9497816593886463
