In [None]:
import sys
from google.colab import drive

drive.mount('/content/drive')
sys.path.append(r"/content/drive/MyDrive/")

In [1]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import imagenet_utils
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from sklearn.preprocessing import LabelEncoder
from loader_util.io import HDF5DatasetWriter
from imutils import paths
from tqdm import tqdm_notebook
import numpy as np
import random
import os

In [14]:
# script constants
dataset_path = r"C:\GoogleDrive\GitHub\image_datasets\kaggle_dogs_vs_cats\train"
extracted_features_path = r"D:\DL4CV_Data\practitioner_bundle\chap10\cats_dogs_extracted_feat_resnet50.hdf5"
batch_size = 16
buffer_size = 1000

In [3]:
print(f"[INFO] loading images......")
image_paths = list(paths.list_images(dataset_path))
random.shuffle(image_paths)

labels = [pth.split(os.path.sep)[-1].split(".")[0] for pth in image_paths]
le = LabelEncoder()
labels = le.fit_transform(labels)
labels

[INFO] loading images......


array([1, 1, 1, ..., 1, 1, 0], dtype=int64)

In [4]:
le.classes_

array(['cat', 'dog'], dtype='<U3')

In [5]:
print(f"[INFO] loading the network......")
model = ResNet50(weights="imagenet",
                 include_top=False,
                 input_shape=(224, 224, 3))

[INFO] loading the network......


In [15]:
dataset = HDF5DatasetWriter(dims=(len(image_paths), 100352),
                            outpath=extracted_features_path,
                            datakey="features",
                            bufsize=buffer_size)
dataset.store_string_feature_labels(le.classes_)

In [None]:

for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:i + batch_size]
    batch_labels = labels[i:i + batch_size]
    batch_images = []

    for j, image_path in enumerate(batch_paths):
        image = load_img(image_path, target_size=(224, 224))
        image = img_to_array(image)
        image = np.expand_dims(image, axis=0)
        image = imagenet_utils.preprocess_input(image)
        batch_images.append(image)

    batch_images = np.vstack(batch_images)
    features = model.predict(batch_images, batch_size=batch_size)
    features = features.reshape((features.shape[0], -1))
    dataset.add(features, batch_labels)

dataset.close()

