#  k-nearest Neighbors Classifier - Implementation on synthetic dataset

In [6]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [2]:
# Creating the synthetic dataset
X, y = make_classification(n_samples=1000, n_features=3, n_informative=3, n_redundant=0, n_classes=2, random_state=42)

In [3]:
# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Sample Training

In [4]:
sample_model = KNeighborsClassifier(n_jobs=-1)
sample_model.fit(X_train, y_train)
y_pred = sample_model.predict(X_test)

### Sample model scores

In [7]:
print("Accuracy:",accuracy_score(y_test, y_pred))
print("Confusion Matrix:",confusion_matrix(y_test, y_pred))
print("Classification Report:",classification_report(y_test, y_pred))

Accuracy: 0.945
Confusion Matrix: [[103   7]
 [  4  86]]
Classification Report:               precision    recall  f1-score   support

           0       0.96      0.94      0.95       110
           1       0.92      0.96      0.94        90

    accuracy                           0.94       200
   macro avg       0.94      0.95      0.94       200
weighted avg       0.95      0.94      0.95       200



## Hyperparameter Tuning

In [12]:
# Creating the Parameter Grid for GridSearchCV
n_neighbors = list(range(1,11)) # number of neighbors from 1 to 10
weights = ['uniform', 'distance']
algorithm = ['auto', 'ball_tree', 'kd_tree']
leaf_size = list(range(5, 31, 5))
p = [1, 2]
n_jobs = [-1]

param_grid = dict(n_neighbors = n_neighbors,
                  weights = weights,
                  algorithm = algorithm,
                  leaf_size = leaf_size,
                  p = p,
                  n_jobs = n_jobs)

In [13]:
grid = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=param_grid, n_jobs=-1)

In [14]:
grid.fit(X_train, y_train)

In [15]:
# Best Parameters
grid.best_params_

{'algorithm': 'auto',
 'leaf_size': 5,
 'n_jobs': -1,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [19]:
# Best model
best_model = grid.best_estimator_

In [20]:
y_pred = best_model.predict(X_test)

### Model Scores

In [21]:
print("Accuracy:",accuracy_score(y_test, y_pred))
print("Confusion Matrix:",confusion_matrix(y_test, y_pred))
print("Classification Report:",classification_report(y_test, y_pred))

Accuracy: 0.945
Confusion Matrix: [[103   7]
 [  4  86]]
Classification Report:               precision    recall  f1-score   support

           0       0.96      0.94      0.95       110
           1       0.92      0.96      0.94        90

    accuracy                           0.94       200
   macro avg       0.94      0.95      0.94       200
weighted avg       0.95      0.94      0.95       200

