## Train the CNN model for age and gender estimation

Note: adapted from train.py

In [1]:
import pandas as pd
import logging
import argparse
from pathlib import Path
import numpy as np
from keras.callbacks import LearningRateScheduler, ModelCheckpoint
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from wide_resnet import WideResNet
from utils import load_data
from keras.preprocessing.image import ImageDataGenerator
from mixup_generator import MixupGenerator
from random_eraser import get_random_eraser

logging.basicConfig(level=logging.DEBUG)

import os

Using TensorFlow backend.


In [2]:
# Select GPU device
os.environ["CUDA_VISIBLE_DEVICES"]="0" # first gpu

In [3]:
class Schedule:
    def __init__(self, nb_epochs, initial_lr):
        self.epochs = nb_epochs
        self.initial_lr = initial_lr

    def __call__(self, epoch_idx):
        if epoch_idx < self.epochs * 0.25:
            return self.initial_lr
        elif epoch_idx < self.epochs * 0.50:
            return self.initial_lr * 0.2
        elif epoch_idx < self.epochs * 0.75:
            return self.initial_lr * 0.04
        return self.initial_lr * 0.008


def get_optimizer(opt_name, lr):
    if opt_name == "sgd":
        return SGD(lr=lr, momentum=0.9, nesterov=True)
    elif opt_name == "adam":
        return Adam(lr=lr)
    else:
        raise ValueError("optimizer name should be 'sgd' or 'adam'")

### Arguments: default values

In [4]:
##help:    path to input database mat file
args_input = "data/imdb_db.mat"
##help:    batch size
args_batch_size = 32
##help:    number of epochs
args_nb_epochs = 30
##help:    initial learning rate
args_lr = 0.1
##help:    optimizer name; 'sgd' or 'adam'
args_opt = "sgd"
##help:    depth of network (should be 10, 16, 22, 28, ...)
args_depth = 16
##help:    width of network
args_width = 8
##help:    validation split ratio
args_validation_split = 0.1
##help:    use data augmentation if set true
args_aug = True
##help:    checkpoint dir
args_output_path = "checkpoints"

In [10]:
# args = get_args()
input_path = args_input
batch_size = args_batch_size
nb_epochs = args_nb_epochs
lr = args_lr
opt_name = args_opt
depth = args_depth
k = args_width
validation_split = args_validation_split
use_augmentation = args_aug

current_nb_path = os.getcwd()
output_path = Path(current_nb_path).resolve().parent.joinpath(args_output_path)
output_path.mkdir(parents=True, exist_ok=True)

In [11]:
logging.debug("Loading data...")
image, gender, age, _, image_size, _ = load_data(input_path)
X_data = image
y_data_g = np_utils.to_categorical(gender, 2)
y_data_a = np_utils.to_categorical(age, 101)

DEBUG:root:Loading data...


In [12]:
model = WideResNet(image_size, depth=depth, k=k)()
opt = get_optimizer(opt_name, lr)
model.compile(optimizer=opt, loss=["categorical_crossentropy", "categorical_crossentropy"],
              metrics=['accuracy'])

logging.debug("Model summary...")
model.count_params()
model.summary()

callbacks = [LearningRateScheduler(schedule=Schedule(nb_epochs, lr)),
             ModelCheckpoint(str(output_path) + "/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                             monitor="val_loss",
                             verbose=1,
                             save_best_only=True,
                             mode="auto")
             ]

DEBUG:root:image_dim_ordering = 'tf'
DEBUG:root:Creating model...
DEBUG:root:Model summary...


Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 64, 64, 16)   432         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 64, 64, 16)   64          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 64, 64, 16)   0           batch_normalization_1[0][0]      
____________________________________________________________________________________________

In [None]:
logging.debug("Running training...")

data_num = len(X_data)
indexes = np.arange(data_num)
np.random.shuffle(indexes)
X_data = X_data[indexes]
y_data_g = y_data_g[indexes]
y_data_a = y_data_a[indexes]
train_num = int(data_num * (1 - validation_split))
X_train = X_data[:train_num]
X_test = X_data[train_num:]
y_train_g = y_data_g[:train_num]
y_test_g = y_data_g[train_num:]
y_train_a = y_data_a[:train_num]
y_test_a = y_data_a[train_num:]

if use_augmentation:
    datagen = ImageDataGenerator(
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        preprocessing_function=get_random_eraser(v_l=0, v_h=255))
    training_generator = MixupGenerator(X_train, [y_train_g, y_train_a], batch_size=batch_size, alpha=0.2,
                                        datagen=datagen)()
    hist = model.fit_generator(generator=training_generator,
                               steps_per_epoch=train_num // batch_size,
                               validation_data=(X_test, [y_test_g, y_test_a]),
                               epochs=nb_epochs, verbose=1,
                               callbacks=callbacks)
else:
    hist = model.fit(X_train, [y_train_g, y_train_a], batch_size=batch_size, epochs=nb_epochs, callbacks=callbacks,
                     validation_data=(X_test, [y_test_g, y_test_a]))
    
logging.debug("Saving history...")
pd.DataFrame(hist.history).to_hdf(output_path.joinpath("history_{}_{}.h5".format(depth, k)), "history")

DEBUG:root:Running training...
