**Preparing our Train data**

In [1]:
from random import shuffle
import glob
shuffle_data = True  # shuffle the addresses
hdf5_path_train = 'cats_dogs_64_train.hdf5'  # file path for the created .hdf5 file
hdf5_path_test = 'cats_dogs_64_test.hdf5'  # file path for the created .hdf5 file
cat_dog_train_path = 'images/*.jpg' # the original data path

# get all the image paths 
addrs = glob.glob(cat_dog_train_path)

# label the data as 0=cat, 1=dog
labels = [1 if 'cat' in addr else 0 for addr in addrs] 

# shuffle data
if shuffle_data:
    c = list(zip(addrs, labels)) # use zip() to bind the images and labels together
    shuffle(c)
 
    (addrs, labels) = zip(*c)  # *c is used to separate all the tuples in the list c,  
                               # "addrs" then contains all the shuffled paths and 
        
# Divide the data into 80% for train and 20% for test
train_addrs = addrs[0:int(0.8*len(addrs))]
train_labels = labels[0:int(0.8*len(labels))]

test_addrs = addrs[int(0.8*len(addrs)):]
test_labels = labels[int(0.8*len(labels)):]


**creating the h5py object for train data**

In [2]:
import numpy as np
import h5py

train_shape = (len(train_addrs), 64, 64, 3)
test_shape = (len(test_addrs), 64, 64, 3)

# open a hdf5 file and create earrays 
f_train = h5py.File(hdf5_path_train, mode='w')
f_test = h5py.File(hdf5_path_test, mode='w')

# PIL.Image: the pixels range is 0-255,dtype is uint.
# matplotlib: the pixels range is 0-1,dtype is float.
f_train.create_dataset("train_img", train_shape, np.uint8)

# the ".create_dataset" object is like a dictionary, the "train_labels" is the key. 
f_train.create_dataset("train_labels", (len(train_addrs),), np.uint8)
f_train["train_labels"][...] = train_labels

# PIL.Image: the pixels range is 0-255,dtype is uint.
# matplotlib: the pixels range is 0-1,dtype is float.
f_test.create_dataset("test_img", test_shape, np.uint8)  

# the ".create_dataset" object is like a dictionary, the "test_labels" is the key. 
f_test.create_dataset("test_labels", (len(test_addrs),), np.uint8)
f_test["test_labels"][...] = test_labels

  from ._conv import register_converters as _register_converters


**Writing images for train data**

In [3]:
import cv2

# loop over train paths
for i in range(len(train_addrs)):

    addr_train = train_addrs[i]
    img_train = cv2.imread(addr_train)
    img_train = cv2.resize(img_train, (64, 64), interpolation=cv2.INTER_CUBIC)# resize to (128,128)
    img_train = cv2.cvtColor(img_train, cv2.COLOR_BGR2RGB) # cv2 load images as BGR, convert it to RGB
    f_train["train_img"][i, ...] = img_train[None] 

# loop over test paths
for j in range(len(test_addrs)):

    addr_test = test_addrs[j]
    img_test = cv2.imread(addr_test)
    img_test = cv2.resize(img_test, (64, 64), interpolation=cv2.INTER_CUBIC)
    img_test = cv2.cvtColor(img_test, cv2.COLOR_BGR2RGB)
    f_test["test_img"][j, ...] = img_test[None]

f_train.close()
f_test.close()