In [1]:
import numpy as np 
import pandas as pd
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
dataset_folder_name = 'UTKFace/UTKFace'
TRAIN_TEST_SPLIT = 0.9
IM_WIDTH = IM_HEIGHT = 100
dataset_dict = {
    'race_id': {
        0: 'white', 
        1: 'black', 
        2: 'asian', 
        3: 'indian', 
        4: 'others'
    },
    'gender_id': {
        0: 'male',
        1: 'female'
    }
}
dataset_dict['gender_alias'] = dict((g, i) for i, g in dataset_dict['gender_id'].items())
dataset_dict['race_alias'] = dict((r, i) for i, r in dataset_dict['race_id'].items())

In [2]:
def parse_dataset(dataset_path, ext='jpg'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (age, gender and sex) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')
            return int(age), dataset_dict['gender_id'][int(gender)], dataset_dict['race_id'][int(race)]
        except Exception as ex:
            return None, None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
        
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'gender', 'race', 'file']
    df = df.dropna()
    
    return df
df = parse_dataset(dataset_folder_name)
df.head()

Unnamed: 0,age,gender,race,file
0,100.0,male,white,UTKFace/UTKFace\100_0_0_20170112213500903.jpg....
1,100.0,male,white,UTKFace/UTKFace\100_0_0_20170112215240346.jpg....
2,100.0,female,white,UTKFace/UTKFace\100_1_0_20170110183726390.jpg....
3,100.0,female,white,UTKFace/UTKFace\100_1_0_20170112213001988.jpg....
4,100.0,female,white,UTKFace/UTKFace\100_1_0_20170112213303693.jpg....


In [3]:
from keras.utils.np_utils import to_categorical
from PIL import Image
class UtkFaceDataGenerator():
    """
    Data generator for the UTKFace dataset. This class should be used when training our Keras multi-output model.
    """
    def __init__(self, df):
        self.df = df
        
    def generate_split_indexes(self):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * TRAIN_TEST_SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]
        train_up_to = int(train_up_to * TRAIN_TEST_SPLIT)
        train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]
        
        # converts alias to id
        self.df['gender_id'] = self.df['gender'].map(lambda gender: dataset_dict['gender_alias'][gender])
        self.df['race_id'] = self.df['race'].map(lambda race: dataset_dict['race_alias'][race])
        self.max_age = self.df['age'].max()
        
        return train_idx, valid_idx, test_idx
    
    def preprocess_image(self, img_path):
        """
        Used to perform some minor preprocessing on the image before inputting into the network.
        """
        im = Image.open(img_path)
        im = im.resize((IM_WIDTH, IM_HEIGHT))
        im = np.array(im) / 255.0
        
        return im
        
    def generate_images(self, image_idx, is_training, batch_size=16):
        """
        Used to generate a batch with images when training/testing/validating our Keras model.
        """
        
        # arrays to store our batched data
        images, ages, races, genders = [], [], [], []
        while True:
            for idx in image_idx:
                person = self.df.iloc[idx]
                
                age = person['age']
                race = person['race_id']
                gender = person['gender_id']
                file = person['file']
                
                im = self.preprocess_image(file)
                
                ages.append(age / self.max_age)
                races.append(to_categorical(race, len(dataset_dict['race_id'])))
                genders.append(to_categorical(gender, len(dataset_dict['gender_id'])))
                images.append(im)
                
                # yielding condition
                if len(images) >= batch_size:
                    yield np.array(images), [np.array(ages), np.array(races), np.array(genders)]
                    images, ages, races, genders = [], [], [], []
                    
            if not is_training:
                break
                
data_generator = UtkFaceDataGenerator(df)
train_idx, valid_idx, test_idx = data_generator.generate_split_indexes() 

In [4]:
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
import tensorflow as tf
class UtkMultiOutputModel():
    """
    Used to generate our multi-output model. This CNN contains three branches, one for age, other for 
    sex and another for race. Each branch contains a sequence of Convolutional Layers that is defined
    on the make_default_hidden_layers method.
    """
    def make_default_hidden_layers(self, inputs):
        """
        Used to generate a default set of hidden layers. The structure used in this network is defined as:
        
        Conv2D -> BatchNormalization -> Pooling -> Dropout
        """
        x = Conv2D(16, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)
        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)
        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)
        return x
    def build_race_branch(self, inputs, num_races):
        """
        Used to build the race branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(64)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_races)(x)
        x = Activation("softmax", name="race_output")(x)
        return x
    def build_gender_branch(self, inputs, num_genders=2):
        """
        Used to build the gender branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(2)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_genders)(x)
        x = Activation("sigmoid", name="gender_output")(x)
        return x
    def build_age_branch(self, inputs):   
        """
        Used to build the age branch of our face recognition network.
        This branch is composed of three Conv -> BN -> Pool -> Dropout blocks, 
        followed by the Dense output layer.
        """
        x = self.make_default_hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(32)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)
        return x
    def assemble_full_model(self, width, height, num_races):
        """
        Used to assemble our multi-output model CNN.
        """
        input_shape = (height, width, 3)
        inputs = Input(shape=input_shape)
        age_branch = self.build_age_branch(inputs)
        race_branch = self.build_race_branch(inputs, num_races)
        gender_branch = self.build_gender_branch(inputs)
        model = Model(inputs=inputs,
                     outputs = [age_branch, race_branch, gender_branch],
                     name="face_net")
        return model
    
model = UtkMultiOutputModel().assemble_full_model(IM_WIDTH, IM_HEIGHT, num_races=len(dataset_dict['race_alias']))

In [5]:
foptimizer_function = 'adam'
model.compile(optimizer=foptimizer_function, 
              loss={
                  'age_output': 'mse', 
                  'race_output': 'categorical_crossentropy', 
                  'gender_output': 'binary_crossentropy'},
              loss_weights={
                  'age_output': 4., 
                  'race_output': 1.5, 
                  'gender_output': 0.1},
              metrics={
                  'age_output': 'mae', 
                  'race_output': 'accuracy',
                  'gender_output': 'accuracy'})

In [6]:
from keras.callbacks import ModelCheckpoint
batch_size = 32
valid_batch_size = 32
epochs = 64
train_gen = data_generator.generate_images(train_idx, is_training=True, batch_size=batch_size)
valid_gen = data_generator.generate_images(valid_idx, is_training=True, batch_size=valid_batch_size)
callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]
history = model.fit_generator(train_gen,
                    steps_per_epoch=len(train_idx)//batch_size,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid_idx)//valid_batch_size)



Epoch 1/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 2/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 3/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 4/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 5/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 6/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 7/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 8/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 9/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 10/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 11/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 12/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 13/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 14/64
INFO:tensorflow:Assets written to: .\model_check

INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 17/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 18/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 19/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 20/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 21/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 22/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 23/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 24/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 25/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 26/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 27/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 28/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 29/64
INFO:tensorflow:Assets written to: .\model_checkpoi

INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 32/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 33/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 34/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 35/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 36/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 37/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 38/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 39/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 40/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 41/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 42/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 43/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 44/64
INFO:tensorflow:Assets written to: .\model_checkpoi

INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 47/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 48/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 49/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 50/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 51/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 52/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 53/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 54/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 55/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 56/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 57/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 58/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 59/64
INFO:tensorflow:Assets written to: .\model_checkpoi

INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 62/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 63/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets
Epoch 64/64
INFO:tensorflow:Assets written to: .\model_checkpoint\assets


In [8]:
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['race_output_acc'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_race_output_acc'],
                    name='Valid'))
fig.update_layout(height=500, 
                  width=700,
                  title='Accuracy for race feature',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()

NameError: name 'go' is not defined

<Figure size 432x288 with 0 Axes>