In [None]:
# import packages
from tensorflow.keras.application import VGG16
from tensorflow.keras.application import imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from pyimagesearch.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import random 
import os 

In [None]:
args = {
    'dataset' : '',
    'output' : '',
    'batch_size' : 32,
    'buffer_size' : 1000,
}

In [None]:
# store the batch size in a convenice var
bs = args['batch_size']

# grab the list of images that well be describing then randomly
# shuffle the images to allow for easy training splits via
# array slicing during training time
print("[INFO] loading images...")
imagePaths = list(paths.list_images(args['dataset']))
random.shuffle(imagePaths)

# extract the class labels from the image paths then encode the 
# labels
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

In [None]:
# load vgg16 network
print("[INFO] loading VGG16 network..")
model = VGG16(weights='imagenet', include_top=False)

# init the HDF5 dataset write, then store the class label
# names in the dataset
dataset = HDF5DatasetWriter((len(imagePaths), 512 * 7 * 7), args['output'], dataKey='features', bufSize=args['buffer_size'])
dataset.storeClassLabels(le.classLabels)

In [None]:
# init the progress bar
widgets = ["Extracting features: ", progressbar.Percentage(), " ", progressbar.Bar(), " ", progressbar.ETA()]
pbar =  progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

# loop over the image in batches    
for i in np.arange(0, len(imagePaths), bs):
    # extract the batch of image and labels, the init the
    # list of actual images that will be passed through the net
    # for feature extraction
    batchPaths =  imagePaths[i:i +bs]
    batchLabels = labels[i:i + bs]
    batchImages = []

    # loop over the image and labels in current batch
    for (j, imagePaths) in enumerate(batchPaths):
        # load the input image using the keras helper utility
        # while ensuring the images is resized to 224 x 224
        image = load_img(imagePaths, target_size=(224, 224))
        image = img_to_array(image)

        # preprocess the image by (1) expanding the dimension
        # and (2) subtracting the mean RGB pixel intensify from the
        # imagenet dataset
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)

        # add images to the batch
        batchImages.append(image)

    # pass the images through the network and use the output
    # as our actual features
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=bs)

    # reshape the features so that each image is represented by
    # a flattended feature vector of the maxpooling2d outputs
    features = features.reshape((features.shape[0], 512 * 7 * 7))

    # add the features and labels to ou hdf5 datasets
    dataset.add(features, batchLabels)
    pbar.update(i)
# close the dataset
dataset.close()
pbar.finish()