In [70]:
#SimplePreprocessor:
import cv2
class SimplePreprocessor():
    def __init__(self, width, height, inter=cv2.INTER_AREA):
        self.width = width
        self.height = height
        self.inter = inter
    def proprocess(self, image):
        #resizing the image to the fixed width and height ignoring the aspect ratio
        return cv2.resize(image, (self.width, self.height), interpolation=self.inter)

In [71]:
#SimpleDatasetLoader:
import cv2
import numpy as np
import os
class SimpleDatasetLoader():
    def __init__(self, preprocessors = None):
        self.preprocessors = preprocessors
        
        #if preprocessors are None, initialize them as an empty list:
        if self.preprocessors == None:
            self.preprocessors = []
        
    def load(self, imagePaths, verbose=-1):
        #initialize the list of features and labels:
        data = []
        labels = []
        
        #loop over the input image:
        for i, imagePath, in enumerate(imagePaths):
            #load an image and extract the class labels assuming that..
            #our path has following format: /path/to/dataset/{class}/{image}.jpg
            image = cv2.imread(imagePath)
            label = imagePath.split('/')[-2]
            
            #check to see if the preprocessors is not None:
            if self.preprocessors is not None:
                #loop over each preprocessors and apply each to the image:
                for p in self.preprocessors:
                    image = p.proprocess(image)
            #treat out preprocessed image as "feature vector" 
            #by updating the data list followed by the labels:
            data.append(image)
            labels.append(label)
            
            if verbose>0 and i>0 and (i+1)%verbose==0:
                print(f'[INFO] processed {i+1}/{len(imagePaths)}')
        return (np.array(data), np.array(labels))

In [72]:
#Train k-nn:

#Step 1: collect dataset:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
#import argparse

#grab the list of images:
print('[INFO] loading images...')
imagePaths = [i.replace('\\', '/') for i in list(paths.list_images('animals/'))]

#initialize the image preprocessor and load the datasets from the disk
# and reshapes the data matrix:
sp = SimplePreprocessor(32,32)
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(imagePaths, verbose=500)
data = data.reshape((data.shape[0], 3072))

#show some information on memory consumption on images:
print(f'[INFO] features matrix: {data.nbytes/(1024*1000.0)}MB')

[INFO] loading images...
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000
[INFO] features matrix: 9.0MB


In [73]:
#Step 2: Split datasets:
#encode the labesl as an intiger from strings:
le = LabelEncoder()
labels = le.fit_transform(labels)

#splitting the data into training and testing by 75% and 25%:
trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.25, random_state=42)

In [76]:
#Step 3: Train and evaluate classifier:
print('[INFO] evaluating k-NN classifier...')
model = KNeighborsClassifier()
model.fit(trainX, trainY)
print(classification_report(testY, model.predict(testX), target_names=le.classes_))

[INFO] evaluating k-NN classifier...
              precision    recall  f1-score   support

        cats       0.42      0.61      0.50       262
        dogs       0.39      0.47      0.42       249
       panda       0.91      0.27      0.41       239

    accuracy                           0.45       750
   macro avg       0.57      0.45      0.44       750
weighted avg       0.57      0.45      0.45       750



              precision    recall  f1-score   support

        cats       0.42      0.61      0.50       262
        dogs       0.39      0.47      0.42       249
       panda       0.91      0.27      0.41       239

    accuracy                           0.45       750
   macro avg       0.57      0.45      0.44       750
weighted avg       0.57      0.45      0.45       750



In [74]:
le.classes_

array(['cats', 'dogs', 'panda'], dtype='<U5')