In [1]:
# import the necessary packages
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from loader_util.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
import random
import os

In [2]:
# path/to/images/cat.0.jpg
dataset_images_path = r"C:\Users\mhasa\Downloads\dogs-vs-cats\train"
extracted_features_output_path = r"C:\Users\mhasa\Downloads\dogs-vs-cats\hdf5\resnet_features.hdf5"
batch_size = 16
buffer_size = 1000

In [3]:
print(f"[INFO] loading images......")
image_paths = list(paths.list_images(dataset_images_path))
random.shuffle(image_paths)

[INFO] loading images......


In [4]:
# extract the class labels from image paths
labels = [pth.split(os.path.sep)[-1].split(".")[0]
          for pth in image_paths]

# encode the labels
le = LabelEncoder()
encoded_labels = le.fit_transform(labels)
print(le.classes_)

['cat' 'dog']


In [5]:
print(f"[INFO] loading resnet 50......")
model = ResNet50(weights="imagenet", include_top=False)

[INFO] loading resnet 50......


In [6]:
# init the hdf5datasetwriter for extracted features
dataset = HDF5DatasetWriter(dims=(len(image_paths), 100352),
                            outpath=extracted_features_output_path,
                            bufsize=buffer_size)

In [7]:
len(le.classes_)

2

In [8]:
dataset.store_string_feature_labels(class_labels=le.classes_)

In [9]:
# init the progressbar
# initialize the progress bar
widgets = ["Extracting Features: ", progressbar.Percentage(), " ",
           progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(image_paths),
                               widgets=widgets).start()

Extracting Features:   0% |                                    | ETA:  --:--:--

In [None]:
# loop over image paths in batches
for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i + batch_size]
    batch_labels = encoded_labels[i:i + batch_size]
    batch_images = []

    for j, image_path in enumerate(batch_paths):
        image = load_img(image_path, target_size=(224, 224))
        image = img_to_array(image)
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)
        batch_images.append(image)

    # pass the preprocessed batch thru resnet
    batch_images = np.vstack(batch_images)
    resnet_features = model.predict(batch_images, batch_size=batch_size)
    resnet_features = resnet_features.reshape((resnet_features.shape[0],
                                               -1))

    # add to the hdf5 dataset
    dataset.add(resnet_features, batch_labels)
    pbar.update(i)

# close the db
dataset.close()
pbar.finish()

Extracting Features:  20% |#######                             | ETA:   1:06:23