**Use GPU: Runtime -> Change runtime type -> GPU (Hardware Accelerator)**

Setup

In [2]:
!cat ~/.keras/keras.json

{
    "epsilon": 1e-07, 
    "floatx": "float32", 
    "image_data_format": "channels_last", 
    "backend": "tensorflow"
}

In [5]:
import keras
print(keras.__version__)

2.4.3


HDF5

In [None]:
# import the necessary packages
from keras.utils import np_utils
import numpy as np
import h5py

class HDF5DatasetGenerator:
    def __init__(self, dbPath, batchSize, preprocessors=None,
        aug=None, binarize=True, classes=2):
        # store the batch size, preprocessors, and data augmentor,
        # whether or not the labels should be binarized, along with
        # the total number of classes
        self.batchSize = batchSize
        self.preprocessors = preprocessors
        self.aug = aug
        self.binarize = binarize
        self.classes = classes

        # open the HDF5 database for reading and determine the total
        # number of entries in the database
        self.db = h5py.File(dbPath)
        self.numImages = self.db["labels"].shape[0]

    def generator(self, passes=np.inf):
        # initialize the epoch count
        epochs = 0

        # keep looping infinitely -- the model will stop once we have
        # reach the desired number of epochs
        while epochs < passes:
            # loop over the HDF5 dataset
            for i in np.arange(0, self.numImages, self.batchSize):
                # extract the images and labels from the HDF dataset
                images = self.db["images"][i: i + self.batchSize]
                labels = self.db["labels"][i: i + self.batchSize]

                # check to see if the labels should be binarized
                if self.binarize:
                    labels = np_utils.to_categorical(labels,
                        self.classes)

                # check to see if our preprocessors are not None
                if self.preprocessors is not None:
                    # initialize the list of processed images
                    procImages = []

                    # loop over the images
                    for image in images:
                        # loop over the preprocessors and apply each
                        # to the image
                        for p in self.preprocessors:
                            image = p.preprocess(image)

                        # update the list of processed images
                        procImages.append(image)

                    # update the images array to be the processed
                    # images
                    images = np.array(procImages)

                # if the data augmenator exists, apply it
                if self.aug is not None:
                    (images, labels) = next(self.aug.flow(images,
                        labels, batch_size=self.batchSize))

                # yield a tuple of images and labels
                yield (images, labels)

            # increment the total number of epochs
            epochs += 1

    def close(self):
        # close the database
        self.db.close()

Extract Features

In [None]:
# import the necessary packages
from keras.applications import ResNet50
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from imutils import paths
import numpy as np
import progressbar
import random
import os

In [None]:
def extract_features(dataset_filepath, output_filepath, batch_size=16, buffer_size=1000):
    # store the batch size in a convenience variable
    bs = batch_size

    # grab the list of images that we'll be describing then randomly
    # shuffle them to allow for easy training and testing splits via
    # array slicing during training time
    print("[INFO] loading images...")
    imagePaths = list(paths.list_images(dataset_filepath))
    random.shuffle(imagePaths)

    # extract the class labels from the image paths then encode the
    # labels
    labels = [p.split(os.path.sep)[-1].split(".")[0] for p in imagePaths]
    le = LabelEncoder()
    labels = le.fit_transform(labels)

    # load the ResNet50 network
    print("[INFO] loading network...")
    model = ResNet50(weights="imagenet", include_top=False)

    # initialize the HDF5 dataset writer, then store the class label
    # names in the dataset
    dataset = HDF5DatasetWriter((len(imagePaths), 100352), output_filepath, dataKey="features", bufSize=buffer_size)
    dataset.storeClassLabels(le.classes_)

    # initialize the progress bar
    widgets = ["Extracting Features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
    pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

    # loop over the images in batches
    for i in np.arange(0, len(imagePaths), bs):
        # extract the batch of images and labels, then initialize the
        # list of actual images that will be passed through the network
        # for feature extraction
        batchPaths = imagePaths[i:i + bs]
        batchLabels = labels[i:i + bs]
        batchImages = []

        # loop over the images and labels in the current batch
        for (j, imagePath) in enumerate(batchPaths):
            # load the input image using the Keras helper utility
            # while ensuring the image is resized to 224x224 pixels
            image = load_img(imagePath, target_size=(224, 224))
            image = img_to_array(image)

            # preprocess the image by (1) expanding the dimensions and
            # (2) subtracting the mean RGB pixel intensity from the
            # ImageNet dataset
            image = np.expand_dims(image, axis=0)
            image = imagenet_utils.preprocess_input(image)

            # add the image to the batch
            batchImages.append(image)

        # pass the images through the network and use the outputs as
        # our actual features
        batchImages = np.vstack(batchImages)
        features = model.predict(batchImages, batch_size=bs)

        # reshape the features so that each image is represented by
        # a flattened feature vector of the `MaxPooling2D` outputs
        features = features.reshape((features.shape[0], 100352))

        # add the features and labels to our HDF5 dataset
        dataset.add(features, batchLabels)
        pbar.update(i)

    # close the dataset
    dataset.close()
    pbar.finish()

In [None]:
extract_features(dataset_filepath="drive/MyDrive/pyimagesearch/datasets/kaggle-dogs-vs-cats/train", 
                 output_filepath="drive/MyDrive/pyimagesearch/output/26-kaggle-dogs-vs-cats/hdf5/features.hdf5")

Train Model

In [None]:
# import the necessary packages
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
import pickle
import h5py

In [None]:
def train_model(db_filepath, model_filepath, jobs=-1):
    # open the HDF5 database for reading then determine the index of
    # the training and testing split, provided that this data was
    # already shuffled *prior* to writing it to disk
    db = h5py.File(db_filepath, "r")
    i = int(db["labels"].shape[0] * 0.75)

    # define the set of parameters that we want to tune then start a
    # grid search where we evaluate our model for each value of C
    print("[INFO] tuning hyperparameters...")
    params = {"C": [0.0001, 0.001, 0.01, 0.1, 1.0]}
    model = GridSearchCV(LogisticRegression(solver="lbfgs", multi_class="auto"), params, cv=3, n_jobs=jobs)
    model.fit(db["features"][:i], db["labels"][:i])
    print("[INFO] best hyperparameters: {}".format(model.best_params_))

    # generate a classification report for the model
    print("[INFO] evaluating...")
    preds = model.predict(db["features"][i:])
    print(classification_report(db["labels"][i:], preds, target_names=db["label_names"]))

    # compute the raw accuracy with extra precision
    acc = accuracy_score(db["labels"][i:], preds)
    print("[INFO] score: {}".format(acc))

    # serialize the model to disk
    print("[INFO] saving model...")
    f = open(model_filepath, "wb")
    f.write(pickle.dumps(model.best_estimator_))
    f.close()

    # close the database
    db.close()

    print("[INFO] done.")

In [None]:
train_model(db_filepath="drive/MyDrive/pyimagesearch/output/26-kaggle-dogs-vs-cats/hdf5/features.hdf5", 
            model_filepath="drive/MyDrive/pyimagesearch/output/26-kaggle-dogs-vs-cats/kaggle-dogs-vs-cats.pickle")