# K-nearest Neighbor Classifier (KNN)

## Import Libraries

In [21]:
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier

## Open the Dataset

In [22]:
df = pd.read_csv('glass.csv')
df

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.51793,12.79,3.50,1.12,73.03,0.64,8.77,0.0,0.00,'build wind float'
1,1.51643,12.16,3.52,1.35,72.89,0.57,8.53,0.0,0.00,'vehic wind float'
2,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.0,0.00,'build wind float'
3,1.51299,14.40,1.74,1.54,74.55,0.00,7.59,0.0,0.00,tableware
4,1.53393,12.30,0.00,1.00,70.16,0.12,16.19,0.0,0.24,'build wind non-float'
...,...,...,...,...,...,...,...,...,...,...
209,1.51610,13.42,3.40,1.22,72.69,0.59,8.32,0.0,0.00,'vehic wind float'
210,1.51592,12.86,3.52,2.12,72.66,0.69,7.97,0.0,0.00,'build wind non-float'
211,1.51613,13.92,3.52,1.25,72.88,0.37,7.94,0.0,0.14,'build wind non-float'
212,1.51689,12.67,2.88,1.71,73.21,0.73,8.54,0.0,0.00,'build wind non-float'


## Data Preprocessing

In [23]:
X = df.values[:, :-1]
y = df.values[:, -1]
print(X)
print(y)

[[1.51793 12.79 3.5 ... 8.77 0.0 0.0]
 [1.51643 12.16 3.52 ... 8.53 0.0 0.0]
 [1.51793 13.21 3.48 ... 8.43 0.0 0.0]
 ...
 [1.51613 13.92 3.52 ... 7.94 0.0 0.14]
 [1.51689 12.67 2.88 ... 8.54 0.0 0.0]
 [1.51852 14.09 2.19 ... 9.32 0.0 0.0]]
["'build wind float'" "'vehic wind float'" "'build wind float'"
 'tableware' "'build wind non-float'" "'build wind non-float'"
 "'vehic wind float'" "'build wind float'" 'headlamps'
 "'build wind non-float'" "'build wind non-float'"
 "'build wind non-float'" "'build wind float'" "'vehic wind float'"
 "'vehic wind float'" "'build wind non-float'" 'headlamps'
 "'build wind non-float'" 'containers' "'build wind non-float'"
 "'build wind float'" "'build wind non-float'" "'build wind non-float'"
 "'build wind float'" 'containers' "'build wind non-float'"
 "'build wind non-float'" 'headlamps' "'build wind non-float'"
 "'vehic wind float'" "'build wind non-float'" "'vehic wind float'"
 'tableware' "'build wind non-float'" "'build wind float'"
 "'build wind 

## Changing Model Parameters

In [24]:
clf = KNeighborsClassifier(n_neighbors=10,
                          weights='uniform',
                          metric='euclidean')

## K-fold Cross-validation

In [25]:
cv = KFold(n_splits=10,
          shuffle=True,
          random_state=0)
cv_results = cross_val_score(clf, X, y, cv=cv)

print(cv_results.mean())

0.6199134199134199


## Prediction with KNN

In [26]:
clf.fit(X, y) # 학습
pred_y = clf.predict([[1.5, 13, 1.5, 1.5, 70, 0.5, 8.9, 0.1, 0.2]]) # Testing
print(pred_y)

["'build wind non-float'"]


## Comparison with Varying k

In [27]:
# Varying the number of neighbors
clf = KNeighborsClassifier(n_neighbors=20, weights='uniform')
clf2 = KNeighborsClassifier(n_neighbors=5, weights='uniform')
clf3 = KNeighborsClassifier(n_neighbors=1, weights='uniform')

results = cross_val_score(clf, X, y, cv=cv)
results2 = cross_val_score(clf2, X, y, cv=cv)
results3 = cross_val_score(clf3, X, y, cv=cv)

print("20 neighbors: {}".format(results.mean()))
print("5 neighbors: {}".format(results2.mean()))
print("1 neighbors: {}".format(results3.mean()))

# 항상 k를 늘리는 것이 좋은 생각은 아닙니다.

20 neighbors: 0.6155844155844156
5 neighbors: 0.648051948051948
1 neighbors: 0.7370129870129871
