In [10]:
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

In [11]:
class KNN:
    def __init__(self,k =3):
        self.k  = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = [self._predict(x) for x in X_test]
        return np.array(predictions)

    def _predict(self, x):
        # calcilating distance from x to all points n the training set
        distances = [self._euclidean_distance(x, x_train) for x_train in self.X_train]

        # K nearest labels 
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]


        # most common label 
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

In [12]:
### Visualizing the data and decision boundaries

def visualize(X, y, model=None, title="KNN Classifier"):
    plt.figure(figsize=(10, 6))

    # plotting with different colors
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', edgecolors='k', s=50)

    if model:
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                            np.arange(y_min, y_max, 0.01))
        Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
        Z = Z.reshape(xx.shape)

        plt.contourf(xx, yy, Z, alpha = 0.3, cmap= 'virdis')

    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.colorbar()
    plt.show()

In [13]:
# Create a synthetic dataset for classification
from sklearn.datasets import make_classification

# Create dataset
X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0,n_classes=2,random_state=42)

# Split the dataset into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train KNN model
knn = KNN(k=3)
knn.fit(X_train, y_train)



In [16]:
# Test and evaluate model
y_pred = knn.predict(X_test)
y_pred

array([0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 0])

In [15]:
# Accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy of the model: {accuracy * 100:.2f}%")

Accuracy of the model: 96.67%
