In [1]:
import io
import pickle
import tensorflow as tf
from skimage.io import imread

test_bin_files = ['/content/drive/MyDrive/faces_emore/agedb_30.bin', '/content/drive/MyDrive/faces_emore/cfp_fp.bin']
print("test_bin_files =", test_bin_files)
for test_bin_file in test_bin_files:
    with open(test_bin_file, "rb") as ff:
        bins, issame_list = pickle.load(ff, encoding="bytes")

    bb = [bytes(ii) for ii in bins]
    print("Saving to %s" % test_bin_file)
    with open(test_bin_file, "wb") as ff:
        pickle.dump([bb, issame_list], ff)

test_bin_files = ['/content/drive/MyDrive/faces_emore/agedb_30.bin', '/content/drive/MyDrive/faces_emore/cfp_fp.bin']
Saving to /content/drive/MyDrive/faces_emore/agedb_30.bin
Saving to /content/drive/MyDrive/faces_emore/cfp_fp.bin


In [2]:
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras

gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [3]:
''' get image paths from data folder '''
import glob2
import pickle
image_names = glob2.glob('/content/drive/MyDrive/faces_emore/faces_emore_112x112_folders/faces_emore_112x112_folders/*/*.jpg')
image_names = np.random.permutation(image_names).tolist()
image_classes = [int(os.path.basename(os.path.dirname(ii))) for ii in image_names]

''' Backup to pickle '''
with open('faces_emore_img_class_shuffle.pkl', 'wb') as ff:
    pickle.dump({'image_names': image_names, "image_classes": image_classes}, ff)

In [4]:
''' Restore from pickle '''
import pickle
from keras.preprocessing.image import ImageDataGenerator

AUTOTUNE = tf.data.experimental.AUTOTUNE
with open('faces_emore_img_class_shuffle.pkl', 'rb') as ff:
    aa = pickle.load(ff)
image_names, image_classes = aa['image_names'], aa['image_classes']
print(len(image_names), len(image_classes))
# 5822653 5822653

''' Construct a dataframe feed to ImageDataGenerator '''
data_df = pd.DataFrame({"image_names": image_names, "image_classes": image_classes})
data_df.image_classes = data_df.image_classes.map(str)

''' ImageDataGenerator flow_from_dataframe '''
batch_size = 128
image_gen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, validation_split=0.05)
train_data_gen = image_gen.flow_from_dataframe(data_df, directory=None, x_col='image_names', y_col="image_classes", class_mode='categorical', target_size=(112, 112), batch_size=batch_size, subset='training', validate_filenames=False)
# Found 5240388 non-validated image filenames belonging to 85742 classes.
val_data_gen = image_gen.flow_from_dataframe(data_df, directory=None, x_col='image_names', y_col="image_classes", class_mode='categorical', target_size=(112, 112), batch_size=batch_size, subset='validation', validate_filenames=False)
# Found 582265 non-validated image filenames belonging to 85742 classes.

classes = data_df.image_classes.unique().shape[0]
steps_per_epoch = np.ceil(len(train_data_gen.classes) / batch_size)
validation_steps = np.ceil(len(val_data_gen.classes) / batch_size)

152856 152856
Found 145214 non-validated image filenames belonging to 2247 classes.
Found 7642 non-validated image filenames belonging to 2247 classes.


In [5]:
''' Convert to tf.data.Dataset '''
train_ds = tf.data.Dataset.from_generator(lambda: train_data_gen, output_types=(tf.float32, tf.int32), output_shapes=([None, 112, 112, 3], [None, classes]))
# train_ds = train_ds.cache()
train_ds = train_ds.repeat()
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)

val_ds = tf.data.Dataset.from_generator(lambda: val_data_gen, output_types=(tf.float32, tf.int32), output_shapes=([None, 112, 112, 3], [None, classes]))

xx, yy = next(iter(train_ds))
print(xx.shape, yy.shape)
# (128, 112, 112, 3) (128, 85742)

(128, 112, 112, 3) (128, 2247)


In [6]:
from tensorflow.keras import layers

''' Basic model '''
# xx = tf.keras.applications.MobileNetV2(input_shape=(112, 112, 3), include_top=False, weights='imagenet')
xx = tf.keras.applications.ResNet50V2(input_shape=(112, 112, 3), include_top=False, weights='imagenet')
xx.trainable = True

inputs = xx.inputs[0]
nn = xx.outputs[0]
nn = layers.GlobalAveragePooling2D()(nn)
nn = layers.Dropout(0.1)(nn)
embedding = layers.Dense(128, name='embedding')(nn)
basic_model = keras.models.Model(inputs, embedding)
basic_model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 112, 112, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 118, 118, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 56, 56, 64)   9472        ['conv1_pad[0][0]']              
                                                

In [7]:
''' Model definition '''
output = layers.Dense(classes, activation='softmax')(basic_model.outputs[0])
model = keras.models.Model(basic_model.inputs[0], output)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 112, 112, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv1_pad (ZeroPadding2D)      (None, 118, 118, 3)  0           ['input_1[0][0]']                
                                                                                                  
 conv1_conv (Conv2D)            (None, 56, 56, 64)   9472        ['conv1_pad[0][0]']              
                                                                                                  
 pool1_pad (ZeroPadding2D)      (None, 58, 58, 64)   0           ['conv1_conv[0][0]']       

In [None]:
model.compile(optimizer='adamax', loss='categorical_crossentropy', metrics=["accuracy"])
hist = model.fit(train_ds, epochs=20, verbose=1, steps_per_epoch=steps_per_epoch, validation_data=val_ds, validation_steps=validation_steps)

Epoch 1/20
   3/1135 [..............................] - ETA: 24:52:11 - loss: 7.7447 - accuracy: 0.0026