# KNN Demos
Some demonstrations of the impact of changing the 'k' on k-NN decision boundaries. The slides show additional examples of the change in boundaries for different (numbers of) Gaussian inputs.


In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn import datasets, neighbors
from mlxtend.plotting import plot_decision_regions

plt.rcParams.update({'font.size': 14})
plt.rcParams.update({'figure.figsize': (6, 6)})

In [None]:
# Borrowed from: https://towardsdatascience.com/knn-visualization-in-just-13-lines-of-code-32820d72c6b6
def knn_comparison(data, k):
  x = data[['X','Y']].values
  y = data['Class'].astype(int).values
  clf = neighbors.KNeighborsClassifier(n_neighbors=k)
  clf.fit(x, y)

  # Plotting decision region
  plot_decision_regions(x, y, clf=clf, legend=2)

  # Aesthetic updates.
  plt.xlabel('X')
  plt.ylabel('Y')
  plt.title('Knn with K='+ str(k))
  plt.show()

In [None]:
# Define our data structure, consisting of labeled Gaussian distributions.
from dataclasses import dataclass
from typing import Tuple

@dataclass
class Gaussian:
  mean: Tuple[float, float]
  stdev: Tuple[float, float]
  label: int

In [None]:
# Create a copule of Gaussian distributions and sample data from them.
gaussian_values = (
    Gaussian(mean=(0, 3), stdev=(3, 3), label=0),
    Gaussian(mean=(0, -2), stdev=(3, 1), label=1),
    Gaussian(mean=(2, -5), stdev=(1, 1), label=0),
)

samples_per_gaussian = 50  # What happens if we make this way larger?
data = []
for gaussian in gaussian_values:
  samples = np.random.normal(loc=gaussian.mean, scale=gaussian.stdev, size=(samples_per_gaussian, 2))
  labels = np.array([[gaussian.label]] * samples_per_gaussian)
  data.extend(np.concatenate([samples, labels], axis=1))

In [None]:
# Frame the data and visualize decision boundaries for various values of k
df = pd.DataFrame(data,   columns=('X', 'Y', 'Class'))
for i in (1, 5, 20, 30):
  knn_comparison(df, i)