In [1]:
from tensorflow.keras.applications import VGG16, imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.preprocessing import LabelEncoder
from loader_util.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import random
import os

In [2]:
args = {
    'dataset': r'C:\Users\mhasa\Google Drive\Tutorial Corner\PYTH\DeepLearning\DeepLearning-DL4CV\ImageDatasets\animals\images',
    'output': 'myFeatures_new.hdf5',
    'batch_size': 32,
    'buffer_size': 1000
}
bs = args['batch_size']
imagePaths = list(paths.list_images(args['dataset']))
random.shuffle(imagePaths)

# extract class labels
labels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
labels = le.fit_transform(labels)

In [3]:
#load the VGG16 model
print('[INFO] loading network....')
model = VGG16(weights="imagenet", include_top=False)

# initialise  the HdF5 dataset writer and then store the class label names 
# in the dataset
dataset = HDF5DatasetWriter(dims=(len(imagePaths), 512*7*7),
                            outputPath=args['output'],
                            dataKey='features',
                            bufSize=args['buffer_size'])
dataset.storeClassLabels(le.classes_)

[INFO] loading network....


In [4]:
# initialise the progress bar
widgets = ["Extracting Features: ", progressbar.Percentage(), " ", 
           progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(imagePaths), widgets=widgets).start()

# loop over the images in batches
for i in range(0, len(imagePaths), bs):
    # extract the batch of images and labels, then initialise the list of 
    # actual images that will be passed thru the network for feature extraction
    batchPaths = imagePaths[i: i+bs]
    batchLabels = labels[i: i+bs]
    batchImages = []
    
    # loop over the images and the labels in the current batch
    for j, imagePath in enumerate(batchPaths):
        # load the input iage using the Keras helper utility while ensuring 
        # the image is resized to 224x224
        image = load_img(imagePath, target_size=(224,224))
        image = img_to_array(image)
        
        # preprocess the image by (1) expanding the dimensions and (2) 
        # subtracting the mean RGB pixel intensity from the Imagenet dataset
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)
        
        # add image to batch
        batchImages.append(image)
        
    # pass the images through the network and use the outputs as feature
    # vectors
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=bs) # type: np.ndarray
    
    # reshape features so that each feature represenets a flattened vect
    features = features.reshape((features.shape[0], 512*7*7))
    
    # add features and labels to our HDF5 dataset
    dataset.add(features, batchLabels)
    pbar.update(i)

Extracting Features:  99% |################################### | ETA:   0:00:00

In [5]:
dataset.close()
pbar.finish()

Extracting Features: 100% |####################################| Time:  0:01:12


In [6]:
import h5py
# now check hdf5 file contents
p = r'C:\Users\mhasa\Google Drive\Tutorial Corner\PYTH\PyCharm Projects\DeepLearningCV\keras_dir\Project19_FeatureExtraction'
file = p + r'\myFeatures_new.hdf5'
db = h5py.File(file)

  """


In [7]:
db.keys()

<KeysViewHDF5 ['features', 'label_names', 'labels']>

In [8]:
db['features'].shape

(3000, 25088)

In [9]:
db['labels']

(3000,)

In [15]:
db['label_names'][:]

array([b'cats', b'dogs', b'panda'], dtype=object)