# KNN

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import datasets
import matplotlib.pyplot as plot
%matplotlib inline
sns.set_style('whitegrid')

In [None]:
df = sns.load_dataset('iris')

In [None]:
X_train=df[['petal_length', 'petal_width']]
species_to_num = {'setosa': 0, 'versicolor': 1, 'virginica': 2}
df['species'] = df['species'].map(species_to_num)
y_train = df['species']

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier()

In [None]:
knn.fit(X_train, y_train)

In [None]:
Xv = X_train.values.reshape(-1,1)
h = 0.02
x_min, x_max = Xv.min(), Xv.max() + 1
y_min, y_max = y_train.min(), y_train.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
z = knn.predict(np.c_[xx.ravel(), yy.ravel()])
z = z.reshape(xx.shape)
fig = plot.figure(figsize=(8,5))
ax = plot.contourf(xx, yy,z, cmap='afmhot', alpha=0.3,)
plot.scatter(X_train.values[:,0], X_train.values[:,1],c=y_train, s=40,alpha=0.9, edgecolors='k')

# Project Cancer Detection

In [None]:
col = ['id', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 'Single Epitheliel Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses', 'Class']

In [None]:
df = pd.read_csv('../cancer/breast-cancer-wisconsin.data', names=col, header=None)

In [None]:
df.head()

In [None]:
np.where(df.isnull())

In [None]:
df['Bare Nuclei'].describe()

In [None]:
df['Bare Nuclei'].value_counts()

In [None]:
df['Bare Nuclei'].replace('?', np.NAN, inplace=True)
df = df.dropna()

In [None]:
df['Class'] = df['Class'].map({2:0, 4:1})

In [None]:
df.columns

In [None]:
X = df.drop(['id','Class'], axis=1)
X_col= X.columns

In [None]:
y = df['Class']

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
X = StandardScaler().fit_transform(X.values)

In [None]:
from sklearn.model_selection import train_test_split
df1 = pd.DataFrame(X, columns=X_col)
df1.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df1, y, train_size=0.8, random_state=42)

In [None]:
knn =  KNeighborsClassifier(n_neighbors=5, p=2, metric='minkowski')

In [None]:
knn.fit(X_train, y_train)

In [None]:
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
%run mpdroid.ipynb

In [None]:
print_training_score(knn, X_train, y_train)

In [None]:
print_test_score(knn, X_test, y_test)

## Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
knn.get_params()

In [None]:
params = { 'n_neighbors': [1,2,3,4,5,6,7,8,9,10]}

In [None]:
grid_search_cv = GridSearchCV(KNeighborsClassifier(), params, n_jobs=-1, verbose=1)

In [None]:
grid_search_cv.fit(X_train, y_train)

In [None]:
print_training_score(grid_search_cv, X_train, y_train)

In [None]:
print_test_score(grid_search_cv, X_test, y_test)

In [None]:
grid_search_cv.best_params_

In [None]:
grid_search_cv.cv_results_['mean_train_score']

In [None]:
grid_search_cv.cv_results_