# k-nearest neighbors (KNN)

## Importation des bibliothèques

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from collections import Counter

## Importation de la dataset

In [None]:
dataset = pd.read_csv('dataset_etudiants.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Le modèle KNN (la classe)

In [None]:
class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def _predict(self, x):
        # Calculer les distances entre x et tous les exemples de la dataset
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        # Trier par distance et retourne les indices des k premiers voisins
        k_idx = np.argsort(distances)[: self.k]
        # Extraire les labels des k proche voisins des exemples de la dataset
        k_neighbor_labels = [self.y_train[i] for i in k_idx]
        # renvoie la label la plus courante
        most_common = Counter(k_neighbor_labels).most_common(1)
        return most_common[0][0]


def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

## Fractionnement de la dataset en training set et la test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

In [None]:
print(X_train)

In [None]:
print(X_test)

In [None]:
print(y_train)

In [None]:
print(y_test)

## L'entraînement du modèle KNN sur la dataset

In [None]:
knn = KNN(k=3)
knn.fit(X_train, y_train)

## La prédiction de X_test

In [None]:
predictions = knn.predict(X_test)
print(predictions)

## La précision de la prediction

In [None]:
def accuracy(y_true, y_pred):
  accuracy = np.sum(y_true == y_pred) / len(y_true)
  return accuracy

print("Précision de la classification KNN (entre 0 et 1) est: ", accuracy(y_test, predictions))

Précision de la classification KNN (entre 0 et 1) est:  1.0
