# K-Nearest Neighbours for Classification

In [1]:
# Importing Libraries
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from itertools import product
from sklearn.metrics import classification_report, accuracy_score
import tabulate

In [2]:
# Preparing 'Iris' Dataset
iris = load_iris()
print("Independent Variables")
print(iris.feature_names)
print(iris.data.shape)

print("Dependent Variables")
print(iris.target_names)
print(iris.target.shape)

Independent Variables
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
(150, 4)
Dependent Variables
['setosa' 'versicolor' 'virginica']
(150,)


In [3]:
# Performing Data Pre-Processing
sc = StandardScaler()
X = sc.fit_transform(iris.data)
Y = iris.target

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33)
print(X_train.shape)

(100, 4)


In [4]:
# Preparing Parameters
K_values = [5, 7, 9] # 5 <= k <= sqrt(X_train.shape[0])
P_values = [1,2] # Distance Metrics
table = []

In [5]:
# K-Nearest Neighbours Algorithm
for k, dm in list(product(K_values, P_values)):
    classifier = KNeighborsClassifier(n_neighbors = k, p = dm)
    classifier.fit(X_train, Y_train)
    Y_pred = classifier.predict(X_test)
    print("\nClassification Report for k = {}, p = {}".format(k, dm))
    print(classification_report(Y_test, Y_pred, target_names=iris.target_names))
    accuracy = round(accuracy_score(Y_test, Y_pred),2)
    table.append((k, dm, accuracy))


Classification Report for k = 5, p = 1
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.88      0.93      0.90        15
   virginica       0.96      0.92      0.94        25

    accuracy                           0.94        50
   macro avg       0.94      0.95      0.95        50
weighted avg       0.94      0.94      0.94        50


Classification Report for k = 5, p = 2
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.88      0.93      0.90        15
   virginica       0.96      0.92      0.94        25

    accuracy                           0.94        50
   macro avg       0.94      0.95      0.95        50
weighted avg       0.94      0.94      0.94        50


Classification Report for k = 7, p = 1
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versico

In [6]:
# Tabulation of Results
headers = ['Neighbours', 'Value of p', 'Accuracy']
tabulate.tabulate(table, headers, tablefmt='html', numalign='center', showindex='always')

Unnamed: 0,Neighbours,Value of p,Accuracy
0,5,1,0.94
1,5,2,0.94
2,7,1,0.92
3,7,2,0.88
4,9,1,0.92
5,9,2,0.9
