Importing Relevant Libraries

In [1]:
import os
import numpy as np
import cv2 as cv
import codecs
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

Loading the Images and the Labels

In [2]:
datapath = 'C:/Users/DELL/OneDrive - City University of Hong Kong - Student/Desktop/OpenCV/MNIST_Data/'
def load():
    sets = ['train', 'test']
    data_dict = {}
    for set in sets:
        images = []
        labels = []
        for i in range(10):
            # READ ALL THE IMAGES IN THE FOLDER
            for file in os.listdir(datapath+set+'/'+str(i)):
                image = cv.imread(datapath+set+'/'+str(i)+'/'+file, 0)
                image = image.astype('float32')
                images.append(image)
                labels.append(i)
        # SAVE THE IMAGES AND LABELS TO A CORRESPONDING KEY
        data_dict[set+'_images'] = np.array(images)
        data_dict[set+'_labels'] = np.array(labels)
    return data_dict

Displaying an Image

In [3]:
def display():
    data_dict = load()
    images = data_dict['train_images']
    labels = data_dict['train_labels']
    no_of_samples = images.shape[0]
    for i in range(10):
        # RANDOMLY SELECT A SAMPLE
        index = np.random.randint(0, no_of_samples)
        image = images[index]
        label = labels[index]
        # DISPLAY THE IMAGE AND LABEL
        plt.imshow(image, cmap='gray')
        plt.title('Label: '+str(label))
        plt.show()

Reshape the images to 1D array

In [4]:
def reshape():
    sets = ['train', 'test']
    data_dict = load()
    for set in sets:
        images = data_dict[set+'_images']
        no_of_samples = images.shape[0]
        # RESHAPE THE IMAGES TO [NO_OF_SAMPLES x HEIGHT*WIDTH]
        data_dict[set+'_images'] = images.reshape(no_of_samples, -1)
    return data_dict

Normalizing the pixel values

In [5]:
def normalize():
    sets = ['train', 'test']
    data_dict = reshape()
    for set in sets:
        images = data_dict[set+'_images']
        # NORMALIZE THE PIXEL VALUES TO [0, 1]
        data_dict[set+'_images'] = images/255
    return data_dict

Training the Model

In [6]:
def train():
    data_dict = normalize()
    train_images = data_dict['train_images']
    train_labels = data_dict['train_labels']
    # CREATE THE MODEL
    model = KNeighborsClassifier(n_neighbors=5)
    # TRAIN THE MODEL
    model.fit(train_images, train_labels)
    return model

Testing the Model

In [7]:
def test():
    data_dict = normalize()
    test_images = data_dict['test_images']
    test_labels = data_dict['test_labels']
    # LOAD THE TRAINED MODEL
    model = train()
    # PREDICT THE LABELS
    predicted_labels = model.predict(test_images)
    # CALCULATE THE ACCURACY
    accuracy = accuracy_score(test_labels, predicted_labels)
    print('Accuracy: ', accuracy)

Predict the Label of a Single Image

In [8]:
def predict():
    data_dict = normalize()
    train_images = data_dict['train_images']
    train_labels = data_dict['train_labels']
    # LOAD THE TRAINED MODEL
    model = train()
    # READ THE IMAGE
    image = cv.imread('image.jpg', 0)
    # RESHAPE THE IMAGE TO [1 x HEIGHT*WIDTH]
    image = image.reshape(1, -1)
    # NORMALIZE THE PIXEL VALUES TO [0, 1]
    image = image/255
    # PREDICT THE LABEL
    predicted_label = model.predict(image)
    print('Predicted Label: ', predicted_label)

Main Method to Call functions

In [9]:
def main():
    display()
    test()

if __name__ == '__main__':
    main()

AttributeError: 'NoneType' object has no attribute 'astype'