In [2]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# !unzip "/content/drive/MyDrive/DataScience/Pyimagesearch_DeepLearningBook/Chapter7/archive.zip" -d "/content/drive/MyDrive/DataScience/Pyimagesearch_DeepLearningBook/Chapter7/"

In [4]:
import os

all_images = []

for path, subdirs, files in os.walk("/content/drive/MyDrive/DataScience/Pyimagesearch_DeepLearningBook/Chapter7/animals"):
    for name in files:
        all_images.append(os.path.join(path, name))

len(all_images)

3000

In [5]:
import cv2

class SimplePreprocessor:
  def __init__(self, width, height, inter=cv2.INTER_AREA):
    self.width = width
    self.height = height
    self.inter = inter

  def preprocess(self, image):
    return cv2.resize(image, (self.width, self.height), interpolation = self.inter)

In [6]:
import numpy as np
class SimpleDatasetLoader:
  def __init__(self, preprocessors=None):
    self.preprocessors = preprocessors

    if self.preprocessors is None:
      self.preprocessors = []

  def load(self, imagePaths, verbose=-1):
    data = []
    labels = []

    for (i, imagePath) in enumerate(imagePaths):
      image = cv2.imread(imagePath)
      label = imagePath.split(os.path.sep)[-2]

      if self.preprocessors is not None:
        for p in self.preprocessors:
          image = p.preprocess(image)

      data.append(image)
      labels.append(label)

      if verbose > 0 and i > 0 and (i+1) % verbose == 0:
        print("[INFO] processed {}/{}".format(i+1, len(imagePaths)))

    return (np.array(data), np.array(labels))


In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from imutils import paths
import argparse

In [8]:
print("[INFO] loading images..")

sp = SimplePreprocessor(32, 32)

sdl = SimpleDatasetLoader(preprocessors=[sp])

(data, labels) = sdl.load(all_images, verbose=500)

print(data.shape)

data = data.reshape((data.shape[0], 3072))

print("[INFO] feature matrix: {:.1f}MB".format(data.nbytes / (1024 * 1000.0)))

[INFO] loading images..
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000
(3000, 32, 32, 3)
[INFO] feature matrix: 9.0MB


In [11]:
le = LabelEncoder()
labels = le.fit_transform(labels)

(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

In [12]:
print("[INFO] evaluating K-NN classifier..")
model = KNeighborsClassifier(n_neighbors=5, n_jobs=1)

model.fit(trainX, trainY)
print(classification_report(testY, model.predict(testX), target_names= le.classes_))

[INFO] evaluating K-NN classifier..
              precision    recall  f1-score   support

        cats       0.43      0.54      0.48       262
        dogs       0.35      0.49      0.41       249
       panda       0.92      0.28      0.43       239

    accuracy                           0.44       750
   macro avg       0.57      0.44      0.44       750
weighted avg       0.56      0.44      0.44       750

