In [None]:
# KNN Classification
from pandas import read_csv
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsClassifier

In [None]:
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]


In [None]:

kfold = KFold(n_splits=10,random_state=7,shuffle=True)


In [None]:
model = KNeighborsClassifier(n_neighbors=18)
results = cross_val_score(model, X, Y, cv=kfold)

In [None]:
print(results.mean())

### Grid Search for Algorithm Tuning

In [None]:
# Grid Search for Algorithm Tuning
import numpy
from pandas import read_csv
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
filename = 'pima-indians-diabetes.data.csv'
names = [
    'preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'
]
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:, 0:8]
Y = array[:, 8]

In [None]:
n_neighbors = numpy.array(range(1,41))
param_grid = dict(n_neighbors=n_neighbors)

In [None]:
model = KNeighborsClassifier()
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(X, Y)

In [None]:
print(grid.best_score_)
print(grid.best_params_)

### Visualizing the CV results

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
# choose k between 1 to 41
k_range = range(1, 41)
k_scores = []
# use iteration to caclulator different k in models, then return the average accuracy based on the cross validation
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    kfold=KFold(n_splits=10,random_state=7,shuffle=True)
    scores = cross_val_score(knn, X, Y, cv=kfold)
    k_scores.append(scores.mean())
# plot to see clearly
plt.plot(k_range, k_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')
plt.xlim([1,41])
plt.show()

In [None]:
k_scores

In [None]:
best_k_val=k_scores.index(max(k_scores))
best_score=max(k_scores)
print(best_k_val+1,best_score)

In [None]:
knn = KNeighborsClassifier(n_neighbors=16)
kfold=KFold(n_splits=10,random_state=7,shuffle=True)
scores = cross_val_score(knn, X, Y, cv=kfold)
scores.mean()