In [None]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

X = np.array([[1, 2],
              [2, 3],
              [3, 4],
              [4, 5],
              [5, 6]])
y = np.array([1, 1, 0, 0, 0])

knn_clf = KNeighborsClassifier(n_neighbors=5, metric='euclidean')

knn_clf.fit(X, y)

y_pred = knn_clf.predict(X)

def accuracy(y_test, y_pred):
  return np.mean(y_test == y_pred)

acc = accuracy(y, y_pred)
print(acc)

0.6


# KNN Pseudocode

## Class: `KNN`

### Attributes:
- `k`: Number of nearest neighbors to consider.
- `X_train`: Training data features.
- `y_train`: Training data labels.

---

## Methods:

### `__init__(k=3)`
**Purpose:** Initialize the KNN model with the specified number of neighbors.

1. Set `self.k` to `k`.

---

### `fit(X, y)`
**Purpose:** Store the training data features and labels.

1. Set `self.X_train` to `X`.
2. Set `self.y_train` to `y`.

---

### `predict(X)`
**Purpose:** Predict labels for the input data `X`.

1. For each sample `x` in `X`:
   - Call `_predict(x)` to predict the label.
2. Return the list of predictions.

---

### `_predict(x)`
**Purpose:** Predict the label for a single sample `x`.

1. Compute the distances between `x` and each training sample:
   - For each training sample `x_train` in `self.X_train`:
     - Compute the Euclidean distance between `x` and `x_train`.
2. Identify the indices of the `k` closest samples:
   - Sort the distances and take the indices of the first `k` samples.
3. Retrieve the labels of the `k` nearest neighbors:
   - Use the indices to get labels from `self.y_train`.
4. Determine the most common label among the `k` neighbors:
   - Call `_most_common_label(k_nearest_labels)` to find the majority label.
5. Return the predicted label.

---

### `_most_common_label(y)`
**Purpose:** Identify the most common label in a list of labels `y`.

1. Count the occurrences of each unique label:
   - Use `np.unique(y, return_counts=True)` to get unique labels and their counts.
2. Return the label with the highest count.


In [None]:
import numpy as np

def euclidean_distance(x1, x2):
  return np.sqrt(np.sum((x1-x2)**2))

class KNN:
  def __init__(self, k=3):
    self.k = k

  def fit(self, X, y):
    self.X_train = X
    self.y_train = y

  def predict(self, X):
    predictions = [self._predict(x) for x in X]
    return predictions

  def _predict(self, x):
    distances = [euclidean_distance(x, x_train) for x_train in self.X_train]

    k_indices = np.argsort(distances)[:self.k]
    k_nearest_labels = [self.y_train[i] for i in k_indices]

    return self._most_common_label(k_nearest_labels)

  def _most_common_label(self, y):
    unique_labels, counts = np.unique(y, return_counts=True)
    return unique_labels[np.argmax(counts)]

X = np.array([[1, 2],
              [2, 3],
              [3, 4],
              [4, 5],
              [5, 6]])
y = np.array([1, 1, 0, 0, 0])


clf = KNN(k=5)
clf.fit(X, y)
predictions = clf.predict(X)

print(predictions)

def accuracy(y_test, y_pred):
  return np.mean(y_test == y_pred)

acc = accuracy(y, predictions)
print(acc)

[0, 0, 0, 0, 0]
0.6
