In [1]:
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
features = iris.data

standardizer = StandardScaler()

features_standardized = standardizer.fit_transform(features)

nearest_neighbors = NearestNeighbors(n_neighbors=2).fit(features_standardized)

new_observation = [1, 1, 1, 1]

distances, indices = nearest_neighbors.kneighbors([new_observation])

features_standardized[indices]

array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
        [0.79566902, 0.32841405, 0.76275827, 1.05393502]]])

In [2]:
nearest_neighbors_euclidean = NearestNeighbors(
    n_neighbors=2, metric='euclidean').fit(features_standardized)

distances

array([[0.49140089, 0.74294782]])

In [4]:
nearest_neighbors_with_self = nearest_neighbors_euclidean.kneighbors_graph(
    features_standardized).toarray()

for i, x in enumerate(nearest_neighbors_with_self):
    x[i] = 0
    
nearest_neighbors_with_self[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [5]:
from sklearn.neighbors import KNeighborsClassifier

iris = datasets.load_iris()
X, y = iris.data, iris.target

standardizer = StandardScaler()

x_std = standardizer.fit_transform(X)

knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1).fit(x_std, y)

new_observations = [[0.75, 0.75, 0.75, 0.75],
                   [1, 1, 1, 1]]

knn.predict(new_observations)

array([1, 2])

In [7]:
knn.predict_proba(new_observations)

array([[0. , 0.6, 0.4],
       [0. , 0. , 1. ]])

In [8]:
# IDing best neighborhood size

from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV

features, target = iris.data, iris.target

features_standardized = standardizer.fit_transform(features)

knn = KNeighborsClassifier(n_neighbors=5, n_jobs=-1)

pipe = Pipeline([('standardizer', standardizer),
                ('knn', knn)])

search_space = [{'knn__n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

classifier = GridSearchCV(
    pipe, search_space, cv=5, verbose=0).fit(features_standardized, target)

In [9]:
classifier.best_estimator_.get_params()['knn__n_neighbors']

6

In [10]:
# radius based

from sklearn.neighbors import RadiusNeighborsClassifier

rnn = RadiusNeighborsClassifier(
    radius=0.5, n_jobs=-1).fit(features_standardized, target)

new_observations = [[1, 1, 1, 1]]

rnn.predict(new_observations)

array([2])