# kNN Simulation using Plotly library

## Import libraries

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold

import plotly.graph_objs as go
import plotly.express as px

import numpy as np
import pandas as pd


## Generate data

In [8]:
X, y = make_classification(n_samples=10000,
                           n_features=3,
                           n_informative=3,
                           n_redundant=0,
                           n_classes=8,
                           n_clusters_per_class=1,
                           random_state=4)
X_train, X_test, y_train, y_test = train_test_split(X, y)


## Train model

In [9]:
# Params
n_neighbors = np.arange(1, 21)
weights = ['uniform', 'distance']
metric = ['euclidean', 'manhattan']
param_grid = dict(n_neighbors=n_neighbors, weights=weights, metric=metric)

# Grid search model
grid = GridSearchCV(estimator=KNeighborsClassifier(),
                    param_grid=param_grid,
                    cv=RepeatedStratifiedKFold(n_splits=10, n_repeats=3),
                    scoring='accuracy',
                    n_jobs=-1)
grid_result = grid.fit(X_train, y_train)

# Summarize results
print('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))


Best: 0.793244 using {'metric': 'euclidean', 'n_neighbors': 9, 'weights': 'distance'}


## Evaluate model

In [10]:
y_train_pred = grid_result.predict(X_train)
print('Accuracy on train: ', accuracy_score(y_train, y_train_pred))

y_pred = grid_result.predict(X_test)
print('Accuracy on test: ', accuracy_score(y_test, y_pred))


Accuracy on train:  1.0
Accuracy on test:  0.792


In [11]:
# 3d scatter plot on train
fig = go.Figure(data=[go.Scatter3d(x=X_train[:, 0],
                                   y=X_train[:, 1],
                                   z=X_train[:, 2],
                                   mode='markers',
                                   marker=dict(color=y_train,
                                               size=2,
                                               opacity=0.8,
                                               colorscale=px.colors.qualitative.Alphabet_r)
                                   )])

# infinite rotate animation
x_eye = 1.25
y_eye = 1.25
z_eye = 1.25
fig.update_layout(scene=dict(xaxis=dict(nticks=4, range=[-4, 4],),
                             yaxis=dict(nticks=4, range=[-4, 4],),
                             zaxis=dict(nticks=4, range=[-4, 4],),
                             aspectmode='cube',
                             ),
                  width=800,
                  height=800,
                  margin=dict(r=0, l=0, b=0, t=0),
                  template='plotly_white',
                    scene_camera=dict(eye=dict(x=x_eye, y=y_eye, z=z_eye)))

# take 1 random point from test set
rd = np.random.randint(0, X_test.shape[0])
x_test = X_test[rd, :]
label_test = y_test[rd]

# predict
y_pred = grid_result.predict(x_test.reshape(1, -1))

# add test point
fig.add_trace(go.Scatter3d(x=[x_test[0]],
                            y=[x_test[1]],
                            z=[x_test[2]],
                            mode='markers',
                            marker=dict(color=y_pred,
                                        size=10,
                                        opacity=1,
                                        colorscale=px.colors.qualitative.Alphabet_r)
                            ))

# add test point's neighbors
neighbors = grid_result.best_estimator_.kneighbors(x_test.reshape(1, -1), return_distance=False)[0]
df_neighbors = pd.DataFrame(X_train[neighbors], columns=['x', 'y', 'z'])
df_neighbors['label'] = y_train[neighbors]
display(df_neighbors)

fig.add_trace(go.Scatter3d(x=df_neighbors['x'],
                            y=df_neighbors['y'],
                            z=df_neighbors['z'],
                            mode='markers',
                            marker=dict(color=df_neighbors['label'],
                                        size=5,
                                        opacity=0.8,
                                        colorscale=px.colors.qualitative.Alphabet_r)
                            ))
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

print("Y_test: ", y_test[rd])
print("Y_pred: ", y_pred)

Unnamed: 0,x,y,z,label
0,-1.740769,-2.4051,-1.710367,6
1,-1.783207,-2.435184,-2.5363,6
2,-1.271244,-2.477257,-2.384811,0
3,-1.279452,-2.503413,-2.722739,0
4,-1.275856,-2.447749,-1.497768,6
5,-1.104279,-2.743149,-1.679277,0
6,-1.113065,-2.147966,-1.997864,0
7,-1.005447,-2.565028,-2.473371,0
8,-0.948628,-2.458322,-2.31774,0


Y_test:  6
Y_pred:  [0]
