In [99]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Classification

In [102]:
from sklearn.datasets import make_classification

In [104]:
X, y = make_classification(
    n_samples = 1000,
    n_features = 3,
    n_redundant = 1,
    n_classes = 2,
    random_state=42
)

In [106]:
from sklearn.model_selection import train_test_split

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [110]:
from sklearn.neighbors import KNeighborsClassifier

In [112]:
classifier = KNeighborsClassifier(n_neighbors=5, algorithm='auto')   # algoritme : 1 (distance Manhatann), 2 (distance Euclidienne)   /    auto:par défaut donc 2
classifier.fit(X_train, y_train)

In [114]:
y_pred = classifier.predict(X_test)

In [116]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [118]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[87  9]
 [12 92]]
0.895
              precision    recall  f1-score   support

           0       0.88      0.91      0.89        96
           1       0.91      0.88      0.90       104

    accuracy                           0.90       200
   macro avg       0.89      0.90      0.89       200
weighted avg       0.90      0.90      0.90       200



In [120]:
from sklearn.model_selection import GridSearchCV

In [122]:
# GridSearchCV
param_grid = {'n_neighbors': [1,2,3,4,5,6,7,8,9,10]}

grid=GridSearchCV(estimator=KNeighborsClassifier(), param_grid=param_grid, cv=5)
grid.fit(X_train,y_train)

In [123]:
y_pred = grid.predict(X_test)

In [124]:
grid.best_params_

{'n_neighbors': 9}

In [125]:
grid.best_score_

0.91375

In [126]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[88  8]
 [13 91]]
0.895
              precision    recall  f1-score   support

           0       0.87      0.92      0.89        96
           1       0.92      0.88      0.90       104

    accuracy                           0.90       200
   macro avg       0.90      0.90      0.89       200
weighted avg       0.90      0.90      0.90       200



# Regression

In [133]:
from sklearn.datasets import make_regression

In [135]:
X, y = make_regression(
    n_samples = 1000,
    n_features= 2,
    noise = 10,
    random_state=42
)

In [137]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [139]:
from sklearn.neighbors import KNeighborsRegressor

In [141]:
classifier = KNeighborsRegressor(n_neighbors=6, algorithm='auto')   # algoritme : 1 (distance Manhatann), 2 (distance Euclidienne)   /    auto:par défaut donc 2
classifier.fit(X_train, y_train)

In [143]:
y_pred = classifier.predict(X_test)

In [148]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [162]:
print(r2_score(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(np.sqrt(mean_squared_error(y_test,y_pred)))

0.9151234135490012
9.238866237248063
11.725225806568702


In [166]:
# GridSearchCV
param_grid = {'n_neighbors': [1,2,3,4,5,6,7,8,9,10]}

grid=GridSearchCV(estimator=KNeighborsRegressor(), param_grid=param_grid, cv=5)
grid.fit(X_train,y_train)

In [168]:
y_pred = grid.predict(X_test)

In [170]:
grid.best_params_

{'n_neighbors': 6}

In [172]:
grid.best_score_

0.9262707963588784