In [7]:
# basic packages
import glob
import h5py
import numpy as np
import os
import matplotlib.pyplot as plt
import progressbar
import random
from imutils import paths

# sklearn
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# keras functions
from tensorflow.keras import backend
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import add
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical

In [5]:
# create a class for a mini version of VGGNet (Simonyan and Zisserman, 2015)
class MiniVGGNet:
    def build(height, width, depth, classes):
        # create the model and name it MiniVGGNet
        model = Sequential(name = 'MiniVGGNet')
                
        # convolutional layer with 32 3x3 feature maps
        model.add(Conv2D(32, (3, 3), padding = 'same', input_shape = (height, width, depth)))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        
        # convolutional layer with 32 3x3 feature maps
        model.add(Conv2D(32, (3, 3), padding = 'same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        
        # 2x2 max pooling layer with stride 2x2
        model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
        model.add(Dropout(0.25))
        
        # convolutional layer with 64 3x3 feature maps
        model.add(Conv2D(64, (3, 3), padding = 'same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        
        # convolutional layer with 64 3x3 feature maps
        model.add(Conv2D(64, (3, 3), padding = 'same'))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        
        # 2x2 max pooling layer with stride 2x2
        model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
        model.add(Dropout(0.25))
        
        # flatten the activations from a square to a vector
        model.add(Flatten())
        
        # fully-connected layer
        model.add(Dense(512))
        model.add(Activation('relu'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        
        # fully-connected layer with softmax classifier
        model.add(Dense(classes))
        model.add(Activation('softmax'))
        
        # return the model
        return model
    
class MiniGoogLeNet:
    def convolution_module(x, K, kX, kY, stride, channelsDim, padding="same"):
        # create a CONV -> BN -> RELU sequence
        x = Conv2D(K, (kX, kY), strides = stride, padding = padding)(x)
        x = BatchNormalization(axis = channelsDim)(x)
        x = Activation('relu')(x)
        
        # return the output
        return x
    
    def inception_module(x, numberOf1x1Kernels, numberOf3x3Kernels, channelsDim):
        # define two "parallel" convolutions of size 1x1 and 3x3 concatenated across the channels dimension
        convolution_1x1 = MiniGoogLeNet.convolution_module(x, numberOf1x1Kernels, 1, 1, (1, 1), channelsDim)
        convolution_3x3 = MiniGoogLeNet.convolution_module(x, numberOf3x3Kernels, 3, 3, (1, 1), channelsDim)
        x = concatenate([convolution_1x1, convolution_3x3], axis = channelsDim)
        
        return x
        
    def downsample_module(x, K, channelsDim):
        # define a CONV and POOL and then concatenate across the channels dimension
        convolution_3x3 = MiniGoogLeNet.convolution_module(x, K, 3, 3, (2, 2), channelsDim, padding = 'valid')
        pool = MaxPooling2D((3, 3), strides = (2, 2))(x)
        x = concatenate([convolution_3x3, pool], axis = channelsDim)
        
        return x
    
    def build(width, height, depth, classes):
        inputShape = (height, width, depth)
        channelsDim = -1
        
        if backend.image_data_format() == "channels_first":
            inputShape = (depth, height, width)
            channelsDim = 1
        
        # define the model input and first CONV module
        inputs = Input(shape = inputShape)
        x = MiniGoogLeNet.convolution_module(inputs, 96, 3, 3, (1, 1), channelsDim)
        
        # two inception modules followed by a downsample module
        x = MiniGoogLeNet.inception_module(x, 32, 32, channelsDim)
        x = MiniGoogLeNet.inception_module(x, 32, 48, channelsDim)
        x = MiniGoogLeNet.downsample_module(x, 80, channelsDim)
        
        # four inception modules followed by a downsample module
        x = MiniGoogLeNet.inception_module(x, 112, 48, channelsDim)
        x = MiniGoogLeNet.inception_module(x, 96, 64, channelsDim)
        x = MiniGoogLeNet.inception_module(x, 80, 80, channelsDim)
        x = MiniGoogLeNet.inception_module(x, 48, 96, channelsDim)
        x = MiniGoogLeNet.downsample_module(x, 96, channelsDim)
        
        # two inception modules followed by global POOL and dropout
        x = MiniGoogLeNet.inception_module(x, 176, 160, channelsDim)
        x = MiniGoogLeNet.inception_module(x, 176, 160, channelsDim)
        x = AveragePooling2D((7, 7))(x)
        x = Dropout(0.5)(x)
        
        # softmax classifier
        x = Flatten()(x)
        x = Dense(classes)(x)
        x = Activation('softmax')(x)
        
        # create a model
        model = Model(inputs, x, name='MiniGoogLeNet')
        
        # return the model
        return model
    
class ResNet:
    def residual_module(data, K, stride, channelsDim, reduce = False, reg = 0.0001, bnEpsilon = 0.00002, bnMomentum = 0.9):
        shortcut = data
        
        # 1x1 CONVs
        bn1 = BatchNormalization(axis = channelsDim, epsilon = bnEpsilon, momentum = bnMomentum)(data)
        act1 = Activation('relu')(bn1)
        conv1 = Conv2D(int(K * 0.25), (1, 1), use_bias = False, kernel_regularizer = l2(reg))(act1)
        
        # 3x3 CONVs
        bn2 = BatchNormalization(axis = channelsDim, epsilon = bnEpsilon, momentum = bnMomentum)(conv1)
        act2 = Activation('relu')(bn2)
        conv2 = Conv2D(int(K * 0.25), (3, 3), strides = stride, padding = 'same', use_bias = False, kernel_regularizer = l2(reg))(act2)
        
        # 1x1 CONVs
        bn3 = BatchNormalization(axis = channelsDim, epsilon = bnEpsilon, momentum = bnMomentum)(conv2)
        act3 = Activation('relu')(bn3)
        conv3 = Conv2D(K, (1, 1), use_bias = False, kernel_regularizer = l2(reg))(act3)
        
        # if we reduce the spatial size, apply a CONV layer to the shortcut
        if reduce:
            shortcut = Conv2D(K, (1, 1), strides = stride, use_bias = False, kernel_regularizer = l2(reg))(act1)
            
        # add the shortcut and the final CONV
        x = add([conv3, shortcut])
        
        return x
    
    def build(width, height, depth, classes, stages, filters, reg = 0.0001, bnEpsilon = 0.00002, bnMomentum = 0.9, dataset='cifar'):
        inputShape = (height, width, depth)
        channelsDim = -1
        
        if backend.image_data_format() == 'channels_first':
            inputShape = (depth, height, width)
            channelsDim = 1
            
        # set the input and apply BN
        inputs = Input(shape = inputShape)
        x = BatchNormalization(axis = channelsDim, epsilon = bnEpsilon, momentum = bnMomentum)(inputs)
        
        if dataset == 'cifar':
            # apply a single CONV layer
            x = Conv2D(filters[0], (3, 3), use_bias = False, padding = 'same',
                       kernel_regularizer = l2(reg))(x)
        
        # loop over the number of stages
        for counter in range(0, len(stages)):
            # initialize the stride
            if counter == 0:
                stride = (1, 1)
            else:
                stride = (2, 2)
                    
            # apply a residual module to reduce the spatial dimension of the image volume
            x = ResNet.residual_module(x, filters[counter + 1], stride, channelsDim, reduce = True, bnEpsilon = bnEpsilon, bnMomentum = bnMomentum)
            
            # loop over the number of layers in the current stage
            for j in range(0, stages[counter] - 1):
                # apply a residual module
                x = ResNet.residual_module(x, filters[counter + 1], (1, 1), channelsDim, bnEpsilon = bnEpsilon, bnMomentum = bnMomentum)
                    
        # apply BN -> ACT -> POOL
        x = BatchNormalization(axis = channelsDim, epsilon = bnEpsilon, momentum = bnMomentum)(x)
        x = Activation('relu')(x)
        x = AveragePooling2D((8, 8))(x)
        
        # softmax classifier
        x = Flatten()(x)
        x = Dense(classes, kernel_regularizer = l2(reg))(x)
        x = Activation('softmax')(x)
        
        # create the model
        model = Model(inputs, x, name = 'ResNet')
        
        # return the model
        return model

## HDF5

If we want to work with huge pre-trained neural nets like VGG19 or other deep CNNs, storing them takes far more space than our RAM is likely to support, so we need to store them on HDD/SDDs in an efficient way. Keras's model format is pretty large, but HDF5 is a good data format for this, but we need some code to be able to interface with this format, which we write below.

In [6]:
class HDF5DataWriter:
    def __init__(self, dims, outputPath, dataKey = 'images', bufferSize = 1000):
        # check if outputpath exists
        if os.path.exists(outputPath):
            raise ValueError('The supplied `outputPath` already exists and cannot be overwritten. Delete '
                            ' the file manually before continuing.', outputPath)
            
        # open the HDF5 database for writing and create two datasets: one to store the images/features and
        # one to store the labels
        self.db = h5py.File(outputPath, 'w')
        self.data = self.db.create_dataset(dataKey, dim, dtype = 'float')
        self.labels = self.db.create_dataset('labels', (dims[0],), dtype = 'float')
        
        # store the buffer size and initialize the buffer and index
        self.bufferSize = bufferSize
        self.buffer = {'data': [], 'labels': []}
        self.index = 0
        
    def add(self, rows, labels):
        # add the rows and labels to the buffer
        self.buffer['data'].extend(rows)
        self.buffer['labels'].extend(labels)
        
        # check if the buffer needs to be flushed to disk
        if len(self.buffer['data']) >= self.bufferSize:
            self.flush()
            
    def flush(self):
        # write the buffer to disk and reset buffer
        i = self.index + len(self.buffer['data'])
        self.data[self.index:i] = self.buffer['data']
        self.labels[self.index:i] = self.buffer['labels']
        
        self.index = i
        self.buffer = {'data': [], 'labels': []}
        
    def storeClassLabels(self, classLabels):
        # create a dataset to store class label names, then store them
        dt = h5py.special_dtype(vlen = unicode)
        labelSet = self.db.create_dataset('label_names', (len(classLabels),), dtype = dt)
        labelSet[:] = classLabels
        
    def close(self):
        # flush entries to disk if needed
        if len(self.buffer['data']) > 0:
            self.flush()
            
        # close the dataset
        self.db.close()

## Feature extraction

Let's write some code to extract features from an arbitrary image dataset.

In [8]:
batchSize = 32
dataset = '../datasets/animals/images'
output = '../datasets/animals/hdf5/features.hdf5'
bufferSize = 1000

In [None]:
print('Loading images...')
imagePaths = list(paths.list_images(dataset))
random.shuffle(imagePaths)

# extract class labels from the image paths and encode the labels (assumes the
# file paths are in the form 'dataset_name/{class_label}/example.jpg')
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)


# load the pretrained VGG16 net on the imagenet dataset but without the final
# fully connected layers (include_top is False), so we get features after
# propagating images through the last pooling later
print('Loading network...')
model = VGG16(weights = 'imagenet', include_top = False)

# initialize the HDF5 dataset writer and store class label names in a dataset
dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), output,
                            dataKey = 'features', bufferSize)

dataset.storeClassLabels(le.classes_)

# initialize the progress bar
widgets = ['Extracting features: ', progressbar.Percentage(), ' ', progressbar.Bar(),
          ' ', progressbar.ETA()]

pbar = progressbar.ProgressBar(maxval = len(imagePaths), widgets = widgets).start()

# loop over the images in batches
for i in np.arange(0, len(imagePaths), bs):
    # extract the batch of images/labels and initialize the list of images that will
    # be passed through the net for feature extration
    batchPaths = imagePaths[i:i + bs]
    batchLabels = labels[i:i + bs]
    batchImages = []
    
    # loop over the images/labels in the current batch
    for (j, imagePath) in enumerate(batchPaths):
        # load the input image and resize it to 224x224 pixels
        image = load_img(imagePath, target_size = (224, 224))
        image = img_to_array(image)
        
        # preprocess by expanding dimensions and subtracting mean RGB pixel
        # intensity from ImageNet
        image = np.expand_dims(image, axis = 0)
        image = imagenet_utils.preprocess_input(image)
        
        # add the images to the batch
        batchImages.append(image)
        
    # pass images through net and use outputs as features
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size = batchSize)
    
    # flattened each image to a feature vector of the MaxPooling2D outputs
    features = features.reshape((features.shape[0], 512 * 7 * 7))
    
    # add the featuers and labels to the HDF5 dataset
    dataset.add(features, batchLabels)
    pbar.update(i)
    
    # close the dataset
    dataset.close()
    pbar.finish()