Preprocessing

In [None]:
import numpy as np
import cv2
import os

class SimplePreprocessor:
  def __init__(self, width, height, inter=cv2.INTER_AREA):
    self.width = width
    self.height = height
    self.inter = inter

  # resize the image to a fixed size, ignoring the aspect ratio
  def preprocess(self, image):
    return cv2.resize(image, (self.width, self.height), interpolation=self.inter)

class SimpleDatasetLoader:
    def __init__(self, preprocessors=None):
        # store the image preprocessor
        self.preprocessors = preprocessors

        # if the preprocessors are None, initialize them as an empty list
        if self.preprocessors is None:
            self.preprocessors = []

    def load(self, imagePaths, verbose=-1):
        # initialize the list of features and labels
        data = []
        labels = []

        # loop over the input images
        for (i, imagePath) in enumerate(imagePaths):
            # load the image and extract the class label assuming
            # that our path has the following format:
            # /path/to/dataset/{class}/{image}.jpg
            image = cv2.imread(imagePath)
            label = imagePath.split(os.path.sep)[-2]

            # check to see if our preprocessors are not None
            if self.preprocessors is not None:
                # loop over the preprocessors and apply each to
                # the image
                for p in self.preprocessors:
                    image = p.preprocess(image)

            # treat our processed image as a "feature vector"
            # by updating the data list followed by the labels
            data.append(image)
            labels.append(label)

            # show an update every `verbose` images
            if verbose > 0 and i > 0 and (i + 1) % verbose == 0:
                print("[INFO] processed {}/{}".format(i + 1,
                    len(imagePaths)))

        # return a tuple of the data and labels
        return (np.array(data), np.array(labels))

SGD

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from imutils import paths

In [None]:
def train_sgd(dataset, epochs=10, alpha=0.01, tol=1e-3):
    # grab the list of images paths
    print("[INFO] loading images...")
    imagePaths = list(paths.list_images(dataset))

    # initialize the image preprocessor, load the dataset from disk,
    # and reshape the data matrix
    sp = SimplePreprocessor(32, 32)
    sdl = SimpleDatasetLoader(preprocessors=[sp])
    (data, labels) = sdl.load(imagePaths, verbose=500)
    data = data.reshape((data.shape[0], 3072))

    # encode the labels as integers
    le = LabelEncoder()
    labels = le.fit_transform(labels)

    # partition the data into training and testing splits using 75% of
    # the data for training and the remaining 25% for testing
    (trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.25, random_state=42)

    # loop over our set of regularizers
    for r in (None, "l1", "l2"):
        # train a SGD classifier using a softmax loss function and the
        # specified regularization function for 10 epochs
        print("[INFO] training model with `{}` penalty".format(r))
        model = SGDClassifier(loss="log", penalty=r, max_iter=epochs,
            learning_rate="constant", tol=tol, eta0=alpha, random_state=42)
        model.fit(trainX, trainY)

        # evaluate the classifier
        acc = model.score(testX, testY)
        print("[INFO] `{}` penalty accuracy: {:.2f}%".format(r, acc * 100))

In [5]:
train_sgd("drive/MyDrive/pyimagesearch/datasets/animals")

[INFO] loading images...
[INFO] processed 500/3000
[INFO] processed 1000/3000
[INFO] processed 1500/3000
[INFO] processed 2000/3000
[INFO] processed 2500/3000
[INFO] processed 3000/3000
[INFO] training model with `None` penalty




[INFO] `None` penalty accuracy: 55.47%
[INFO] training model with `l1` penalty




[INFO] `l1` penalty accuracy: 53.07%
[INFO] training model with `l2` penalty
[INFO] `l2` penalty accuracy: 47.60%


