# k-neighbors Classifier

In [88]:
import numpy as np
import pandas as pd
import seaborn as sns

In [89]:
df=pd.read_csv('iris.csv')
df

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [90]:
df.variety.value_counts()

Setosa        50
Versicolor    50
Virginica     50
Name: variety, dtype: int64

In [91]:
df.dtypes

sepal.length    float64
sepal.width     float64
petal.length    float64
petal.width     float64
variety          object
dtype: object

In [92]:
df.variety=df.variety.astype('category')

In [93]:
df.variety=df.variety.cat.codes
df

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [94]:
np.random.seed(50)

In [95]:
# Splitting the data
def splitting(mydata, ratio):
    train_num = int(len(mydata) * ratio)
    train_index=np.random.choice(range(0,len(mydata)),replace=False,size=train_num)
    train=mydata.iloc[train_index]
    test=mydata[~mydata.index.isin(train_index)]
    return train,test

In [96]:
train,test=splitting(mydata=df, ratio=0.8)

In [97]:
train

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
88,5.6,3.0,4.1,1.3,1
72,6.3,2.5,4.9,1.5,1
20,5.4,3.4,1.7,0.2,0
16,5.4,3.9,1.3,0.4,0
147,6.5,3.0,5.2,2.0,2
...,...,...,...,...,...
96,5.7,2.9,4.2,1.3,1
76,6.8,2.8,4.8,1.4,1
66,5.6,3.0,4.5,1.5,1
121,5.6,2.8,4.9,2.0,2


In [98]:
test

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
2,4.7,3.2,1.3,0.2,0
6,4.6,3.4,1.4,0.3,0
19,5.1,3.8,1.5,0.3,0
22,4.6,3.6,1.0,0.2,0
26,5.0,3.4,1.6,0.4,0
30,4.8,3.1,1.6,0.2,0
31,5.4,3.4,1.5,0.4,0
33,5.5,4.2,1.4,0.2,0
35,5.0,3.2,1.2,0.2,0
43,5.0,3.5,1.6,0.6,0


In [99]:
test.variety.value_counts()

0    11
2    11
1     8
Name: variety, dtype: int64

In [100]:
# calculate the Euclidean distance between two vectors be aware that the last element is for the class
def euclidean_distance(row1, row2):
    return np.sqrt(np.sum((row1[:-1] - row2[:-1])**2))

In [101]:
A=test.iloc[12,:]
A

sepal.length    5.9
sepal.width     3.2
petal.length    4.8
petal.width     1.8
variety         1.0
Name: 70, dtype: float64

In [102]:
B=test.iloc[11,:]
B

sepal.length    5.6
sepal.width     2.9
petal.length    3.6
petal.width     1.3
variety         1.0
Name: 64, dtype: float64

In [103]:
euclidean_distance(A, B)

1.3674794331177342

In [104]:
# Locate the most similar neighbors
def get_neighbors(train, test_instance, num_neighbors):
    distances = []
    train_copy=train.copy()
    for j in range(len(train)):
        dist = euclidean_distance(test_instance, train.iloc[j,:])
        distances.append(dist)
    train_copy['distance']=distances
    return train_copy.nsmallest(num_neighbors, ['distance'])

In [105]:
neighbors=get_neighbors(train, A, num_neighbors=5)
neighbors

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety,distance
138,6.0,3.0,4.8,1.8,2,0.223607
127,6.1,3.0,4.9,1.8,2,0.3
149,5.9,3.0,5.1,1.8,2,0.360555
56,6.3,3.3,4.7,1.6,1,0.469042
126,6.2,2.8,4.8,1.8,2,0.5


In [106]:
neighbors.variety.value_counts()

2    4
1    1
Name: variety, dtype: int64

In [107]:
neighbors.variety.value_counts().index[0]

2

In [108]:
# Sınıf Tahmininde bulunma
def predict_classes(train, test, num_neighbors):
    prediction=[]
    for j in range(len(test)):
        neighbors = get_neighbors(train, test.iloc[j,:], num_neighbors)
        predicted_class = neighbors.variety.value_counts().index[0]
        prediction.append(predicted_class)
    return prediction

In [109]:
prediction=predict_classes(train, test, num_neighbors=5)
prediction

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2]

In [110]:
list(zip(prediction,test.variety))

[(0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (0, 0),
 (1, 1),
 (2, 1),
 (1, 1),
 (1, 1),
 (1, 1),
 (1, 1),
 (1, 1),
 (1, 1),
 (2, 2),
 (1, 2),
 (2, 2),
 (2, 2),
 (2, 2),
 (2, 2),
 (2, 2),
 (2, 2),
 (2, 2),
 (2, 2),
 (2, 2)]

In [111]:
def compute_accuracy(prediction,test_y):
    return np.mean(prediction==test_y)

In [112]:
compute_accuracy(prediction,test.variety)

0.9333333333333333

In [113]:
prediction=predict_classes(train, test, num_neighbors=1)
compute_accuracy(prediction,test.variety)

0.9

In [114]:
prediction=predict_classes(train, test, num_neighbors=2)
compute_accuracy(prediction,test.variety)

0.9

In [115]:
prediction=predict_classes(train, test, num_neighbors=10)
compute_accuracy(prediction,test.variety)

0.9

In [116]:
prediction=predict_classes(train, test, num_neighbors=75)
compute_accuracy(prediction,test.variety)

0.8666666666666667

In [117]:
prediction=predict_classes(train, test, num_neighbors=85)
compute_accuracy(prediction,test.variety)

0.3

WKNN

In [127]:
def get_neighbors(train, test_instance, num_neighbors):
    distances = []
    train_copy=train.copy()
    for j in range(len(train)):
        dist = euclidean_distance(test_instance, train.iloc[j,:])
        distances.append(dist)
    train_copy['distance']=distances
    train_copy['weights']=1/train_copy['distance']
    return train_copy.nsmallest(num_neighbors, ['distance'])

get_neighbors(df, A, 5)

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety,distance,weights
70,5.9,3.2,4.8,1.8,1,0.0,inf
138,6.0,3.0,4.8,1.8,2,0.223607,4.472136
127,6.1,3.0,4.9,1.8,2,0.3,3.333333
149,5.9,3.0,5.1,1.8,2,0.360555,2.773501
85,6.0,3.4,4.5,1.6,1,0.424264,2.357023


In [119]:
def predict_classes(train, test, num_neighbors):
    prediction=[]
    for j in range(len(test)):
        neighbors = get_neighbors(train, test.iloc[j,:], num_neighbors)
        Class_Weight_Sums=[]
        for i in np.unique(neighbors.variety):
            Class_Weight_Sums.append((i,np.sum(neighbors[neighbors.variety==i].weights)))
            print(Class_Weight_Sums)
        predicted_class = max(Class_Weight_Sums, key=lambda x:x[1])[0]
        prediction.append(predicted_class)
    return np.array(prediction)

In [124]:
def Weighted_KNN(train,test,number_neighbors):
    test_y=test.variety
    predictions=predict_classes(train, test, number_neighbors)
    accuracy=compute_accuracy(predictions,test_y)
    return accuracy,predictions
Weighted_KNN(train, test, 85)

[(0, 89.3716708269923)]
[(0, 89.3716708269923), (1, 11.85986159578993)]
[(0, 89.3716708269923), (1, 11.85986159578993), (2, 1.4942217613118403)]
[(0, 82.66391488608022)]
[(0, 82.66391488608022), (1, 11.77972560016527)]
[(0, 82.66391488608022), (1, 11.77972560016527), (2, 1.488567528072474)]
[(0, 53.408420968299495)]
[(0, 53.408420968299495), (1, 12.958263870892468)]
[(0, 53.408420968299495), (1, 12.958263870892468), (2, 1.3649466724809296)]
[(0, 68.1315569424076)]
[(0, 68.1315569424076), (1, 11.774586624728403)]
[(0, 68.1315569424076), (1, 11.774586624728403), (2, 1.2448983761345853)]
[(0, 81.93475735372496)]
[(0, 81.93475735372496), (1, 11.884882891450461)]
[(0, 81.93475735372496), (1, 11.884882891450461), (2, 1.497535839547001)]
[(0, 87.0556256151596)]
[(0, 87.0556256151596), (1, 11.400757942557771)]
[(0, 87.0556256151596), (1, 11.400757942557771), (2, 1.45430747577365)]
[(0, 94.03593424182485)]
[(0, 94.03593424182485), (1, 12.121538470486252)]
[(0, 94.03593424182485), (1, 12.1215384

(0.9666666666666667,
 array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2], dtype=int64))