# KNN to image classify CIFAR-10

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os

X_tr = []
Y_tr = []
dir_train = './Data/train/'
for subdir in os.listdir(dir_train):
    for file in os.listdir(dir_train + subdir):
        img = plt.imread(dir_train + subdir + '/' + file)
        label = subdir
        X_tr.append(img)
        Y_tr.append(label)
        
X_train = np.array(X_tr)
y_train = np.array(Y_tr)

print(f'Shape of training data: {X_train.shape}')
print(f'Shape of training Label: {y_train.shape}')

In [None]:
X_te = []
Y_te = []
dir_test = './Data/test/'
for subdir in os.listdir(dir_test):
    for file in os.listdir(dir_test + subdir):
        img = plt.imread(dir_test + subdir + '/' + file)
        label = subdir
        X_te.append(img)
        Y_te.append(label)
        
X_test = np.array(X_te)
y_test = np.array(Y_te)

print(f'Shape of testing data: {X_test.shape}')
print(f'Shape of testing Label: {y_test.shape}')

In [None]:
class KNN(object):
    def __init__(self):
        pass
    
    def train(self, X, y):
        self.X_train = X
        self.y_train = y
        
    def predict(self, X, k=1, num_loops=0):
        if num_loops == 0:
            dists = self.compute_distances(X)
        else:
            raise ValueError(f'Invalid value {num_loops} for num_loops')
        return self.predict_labels(dists, k=k)
    
    def compute_distances(self, X):
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train), dtype=np.longlong)
        for i in range(num_test):
            for j in range(num_train):
                dists[i, j] = abs(X[i].sum() - self.X_train[j].sum())
        return dists
        
    def predict_labels(self, dists, k=1):
        num_test = dists.shape[0]
        y_pred = [''] * num_test
        for i in range(num_test):
            closest_y = []
            sorted_dist = np.argsort(dists[i])
            closest_y = list(self.y_train[sorted_dist[0:k]])
            y_pred[i]= max(closest_y, key=closest_y.count)
        return y_pred

In [None]:
classifier = KNN()
classifier.train(X_train, y_train)
dists = classifier.compute_distances(X_test)

In [None]:
K_values = []
Accuracy_values = []
for j in range(1, 21):
    y_test_pred = classifier.predict_labels(dists, k=j)
    num_correct = 0
    for i in range(len(y_test_pred)):
        if y_test[i] == y_test_pred[i]:
            num_correct += 1
    accuracy = float((num_correct) * 2 * 100) / len(y_test_pred)
    K_values.append(j)
    Accuracy_values.append(accuracy)

print(len(K_values))
    
for i in range(20):
    print(f'Accuracy for K value of {K_values[i]} is: {Accuracy_values[i]}')

In [None]:
X = np.array(K_values)
Y = np.array(Accuracy_values)
plt.plot(X, Y, 'go', linewidth=2, markersize=5)
plt.ylim([20, 30])
plt.xlim([0, 21])
plt.xlabel('K_values')
plt.ylabel('Accuracy(%)')
plt.grid()
plt.show()

#Result Analysis

The abbreviation KNN stands for “K-Nearest Neighbour”. It is a supervised machine learning algorithm. The algorithm can be used to solve both classification and regression problem statements. It was developed by Evelyn Fix and Joseph Hodges in 1951. The KNN algorithm can compete with the most accurate models because it makes highly accurate predictions. Therefore, you can use the KNN algorithm for applications that require high accuracy but that do not require a human-readable model. The quality of the predictions depends on the distance measure. In this implementation, it was an attept to undertake to classify the cifar 10 images dataset. In this implementation, the accuracy is not satisfactory. So we can say that KNN (K-Nearest Neighbour) is not the perfect algorithm for image classification.