# 샘플의 최근접 이웃 찾기

In [3]:
from sklearn import datasets
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = NearestNeighbors(n_neighbors=2).fit(X_scaled)

observed = [[1, 1, 1, 1]]
distances, indices = model.kneighbors(observed)
X_scaled[indices]

array([[[1.03800476, 0.55861082, 1.10378283, 1.18556721],
        [0.79566902, 0.32841405, 0.76275827, 1.05393502]]])

In [4]:
distances

array([[0.49140089, 0.74294782]])

In [5]:
model = NearestNeighbors(n_neighbors=3, metric='euclidean').fit(X_scaled)
nearest_neighbors_with_self = model.kneighbors_graph(X_scaled).toarray()

In [6]:
for i, x in enumerate(nearest_neighbors_with_self):
    x[i] = 0
    
nearest_neighbors_with_self[0]    

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

# k-최근접 이웃 분류기 만들기

In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, weights='distance').fit(X_scaled, y)

# 두 개의 샘플을 만듭니다.
new_observations = [[ 0.75,  0.75,  0.75,  0.75],
                    [ 1,  1,  1,  1]]

model.predict(new_observations)

array([1, 2])

In [10]:
model.predict_proba(new_observations)

array([[0.       , 0.6123219, 0.3876781],
       [0.       , 0.       , 1.       ]])

In [25]:
from sklearn.neighbors import KNeighborsRegressor
import pandas as pd

df = pd.read_csv('./datasets/BostonHousing.csv')
X = df.iloc[:, 0:2]
y = df.iloc[:, -1]

In [26]:
model = KNeighborsRegressor(n_neighbors=10)
model.fit(X, y)
model.predict(X[0:1])[0]*1000

32440.000000000004

In [27]:
import numpy as np

idxs = model.kneighbors(X[0:1], return_distance=False)
idxs

array([[  0,  64, 280, 281, 282, 269, 272, 271, 279, 273]])

In [30]:
np.mean(y.iloc[idxs[0]])*1000

32440.000000000004

# 최선의 이웃 개수 결정하기

In [32]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import GridSearchCV

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = KNeighborsClassifier(n_neighbors=5, n_jobs=-1, weights='distance').fit(X_scaled, y)
pipe = Pipeline([
    ('scaler' , scaler),
    ('knn', model)])

params = [{"knn__n_neighbors": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}]

grid = GridSearchCV(pipe, params, cv=5, verbose=0).fit(X_scaled, y)



In [33]:
grid.best_params_

{'knn__n_neighbors': 6}

In [34]:
grid.best_estimator_.get_params()['knn__n_neighbors']

6

# 반지름 기반의 최근접 이웃 분류기 만들기

In [36]:
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

model = RadiusNeighborsClassifier(
    radius=.5, n_jobs=-1).fit(X_scaled, y)

new_observations = [[ 1,  1,  1,  1]]

model.predict(new_observations)

array([2])

In [38]:
new_observations = [[ 100,  100,  100,  100]]

model = RadiusNeighborsClassifier(
    radius=.5, n_jobs=-1, outlier_label=-1).fit(X_scaled, y)

model.predict(new_observations)



array([-1])