Preprocessing

In [None]:
import cv2

In [None]:
class SimplePreprocessor:
  def __init__(self, width, height, inter=cv2.INTER_AREA):
    self.width = width
    self.height = height
    self.inter = inter

  # resize the image to a fixed size, ignoring the aspect ratio
  def preprocess(self, image):
    return cv2.resize(image, (self.width, self.height), interpolation=self.inter)

Dataset

In [None]:
import numpy as np
import cv2
import os

In [None]:
class SimpleDatasetLoader:
    def __init__(self, preprocessors=None):
        # store the image preprocessor
        self.preprocessors = preprocessors

        # if the preprocessors are None, initialize them as an empty list
        if self.preprocessors is None:
            self.preprocessors = []

    def load(self, imagePaths, verbose=-1):
        # initialize the list of features and labels
        data = []
        labels = []

        # loop over the input images
        for (i, imagePath) in enumerate(imagePaths):
            # load the image and extract the class label assuming
            # that our path has the following format:
            # /path/to/dataset/{class}/{image}.jpg
            image = cv2.imread(imagePath)
            label = imagePath.split(os.path.sep)[-2]

            # check to see if our preprocessors are not None
            if self.preprocessors is not None:
                # loop over the preprocessors and apply each to
                # the image
                for p in self.preprocessors:
                    image = p.preprocess(image)

            # treat our processed image as a "feature vector"
            # by updating the data list followed by the labels
            data.append(image)
            labels.append(label)

            # show an update every `verbose` images
            if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
                print("[INFO] processed {}/{}".format(i + 1,
                    len(imagePaths)))

        # return a tuple of the data and labels
        return (np.array(data), np.array(labels))

KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths

In [None]:
def train_knn(dataset, neighbors=1, jobs=1):
    # grab the list of images that we'll be describing
    print("[INFO] loading images...")
    imagePaths = list(paths.list_images(dataset))

    # initialize the image preprocessor, load the dataset from disk, and reshape the data matrix
    width = 32
    height = 32
    channels = 3
    sp = SimplePreprocessor(width, height)
    sdl = SimpleDatasetLoader(preprocessors=[sp])
    (data, labels) = sdl.load(imagePaths, verbose=500)
    data = data.reshape((data.shape[0], width*height*channels))

    # show some information on memory consumption of the images
    print("[INFO] features matrix: {:.1f}MB".format(data.nbytes / (1024 * 1024.0)))

    # encode the labels as integers
    le = LabelEncoder()
    labels = le.fit_transform(labels)

    # partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing
    (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

    # train and evaluate a k-NN classifier on the raw pixel intensities
    print("[INFO] evaluating k-NN classifier...")
    model = KNeighborsClassifier(n_neighbors=neighbors, n_jobs=jobs)
    model.fit(trainX, trainY) # train classifier
    print(classification_report(testY, model.predict(testX), target_names=le.classes_)) # evaluate classifier

In [None]:
train_knn("drive/MyDrive/pyimagesearch/datasets/animals")

[INFO] loading images...
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000
[INFO] features matrix: 8.8MB
[INFO] evaluating k-NN classifier...
              precision    recall  f1-score   support

        cats       0.38      0.59      0.46       239
        dogs       0.44      0.44      0.44       262
       panda       0.81      0.36      0.50       249

    accuracy                           0.46       750
   macro avg       0.54      0.46      0.47       750
weighted avg       0.54      0.46      0.47       750

