In [1]:
import pandas as pd
import numpy as np   
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from sklearn.metrics import f1_score

import keras, math
import tensorflow as tf
from keras_preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Sequential, Model, load_model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

## Preprocessing Functions for Adience Dataset

In [2]:
def create_path(df, basepath):
    """
    this function creates column of image file path for each image
    """
    df['path'] = df.apply(lambda x: f"{basepath}{x['user_id']}/landmark_aligned_face.{x['face_id']}.{x['original_image']}", axis=1)
    return df

def filter_df(df):
    """
    this function removes images with unknown gender from the dataset
    """
    df['valid'] = df['gender'].apply(lambda x: int(x in ['f', 'm']))
    df = df[df['valid'] == 1]
    return df.reset_index(drop=True)

def flow_ready(df):
    """
    this function will make a dataframe ready for flow with cols: [image_path, category]
    """
    df = pd.concat([df['path'],df['gender']],axis=1)
    return df

## Loading Dataset

In [3]:
data_folder = "../input/adience-age-gender-prediction-aligned-faces/age-gender-data/"
img_folder = "../input/adience-age-gender-prediction-aligned-faces/age-gender-data/aligned/"

fold0_df = pd.read_csv(f"{data_folder}fold_{0}_data.txt", sep="\t")
fold1_df = pd.read_csv(f"{data_folder}fold_{1}_data.txt", sep="\t")
fold2_df = pd.read_csv(f"{data_folder}fold_{2}_data.txt", sep="\t")
fold3_df = pd.read_csv(f"{data_folder}fold_{3}_data.txt", sep="\t")
fold4_df = pd.read_csv(f"{data_folder}fold_{4}_data.txt", sep="\t")

# remove unknown genders
fold0_df = filter_df(fold0_df)
fold1_df = filter_df(fold1_df)
fold2_df = filter_df(fold2_df)
fold3_df = filter_df(fold3_df)
fold4_df = filter_df(fold4_df)

# create image file path for each image in dataset
# create image_path, label dataframe for image generator
fold0_df = flow_ready(create_path(fold0_df, img_folder))
fold1_df = flow_ready(create_path(fold1_df, img_folder))
fold2_df = flow_ready(create_path(fold2_df, img_folder))
fold3_df = flow_ready(create_path(fold3_df, img_folder))
fold4_df = flow_ready(create_path(fold4_df, img_folder))

# initial train test set
train_df = pd.concat([fold0_df, fold1_df, fold2_df, fold3_df])
test_df = fold4_df

train_df.head()

Unnamed: 0,path,gender
0,../input/adience-age-gender-prediction-aligned...,f
1,../input/adience-age-gender-prediction-aligned...,m
2,../input/adience-age-gender-prediction-aligned...,f
3,../input/adience-age-gender-prediction-aligned...,m
4,../input/adience-age-gender-prediction-aligned...,m


## Data Augmentation

In [4]:
class CutOutDataGenerator(ImageDataGenerator):
    def __init__(self,
                 cutout_size=None,
                 n_squares=None,
                 **kwargs):
        '''
        Custom image data generator for cutout regularization.
        Behaves like ImageDataGenerator, but allows color augmentation.
        '''
        super().__init__(
            preprocessing_function=self.augment_cutout,
            **kwargs)

        self.cutout_size = cutout_size
        self.n_squares = n_squares
    
    def augment_cutout(self, image):
        '''Takes an input image and returns a cutout version of it'''
        h, w, channels = image.shape
        new_image = image
        for _ in range(self.n_squares):
            y = tf.random.uniform([1], minval=0, maxval=h, dtype=tf.int32).numpy()[0]
            x = tf.random.uniform([1], minval=0, maxval=w, dtype=tf.int32).numpy()[0]
            y1 = tf.clip_by_value(y - self.cutout_size // 2, 0, h).numpy()
            y2 = tf.clip_by_value(y + self.cutout_size // 2, 0, h).numpy()
            x1 = tf.clip_by_value(x - self.cutout_size // 2, 0, w).numpy()
            x2 = tf.clip_by_value(x + self.cutout_size // 2, 0, w).numpy()
            new_image[y1:y2,x1:x2,:] = 0
        return new_image

In [5]:
# based on pre-trained base
image_size = (218, 178)
batch_size = 32

In [6]:
train_datagen = CutOutDataGenerator(rotation_range = 6,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   rescale = 1./255.,
                                   horizontal_flip = True,
                                   cutout_size=40, n_squares=1)

val_datagen = ImageDataGenerator(rescale = 1./255.)

In [7]:
train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                            x_col=train_df.columns[0],
                                            y_col=train_df.columns[1],
                                            batch_size=batch_size,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=image_size,
                                            color_mode='rgb')

val_generator = val_datagen.flow_from_dataframe(dataframe=test_df,
                                            x_col=test_df.columns[0],
                                            y_col=test_df.columns[1],
                                            batch_size=batch_size,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="categorical",
                                            target_size=image_size,
                                            color_mode='rgb')

Found 14047 validated image filenames belonging to 2 classes.
Found 3445 validated image filenames belonging to 2 classes.


In [8]:
inceptionv3_celeba = "../input/image-recognition-gender-detection-inceptionv3/weights.best.inc.male.hdf5"
pretrained_model = load_model(inceptionv3_celeba)

## Callbacks

In [9]:
checkpoint = ModelCheckpoint('cutout_model_fold4.h5', 
                             monitor='val_accuracy', 
                             verbose=1, 
                             save_best_only=True, 
                             mode='max')

def step_decay(epoch):
    if epoch < 10:
        lrate = 0.01
    else:
        initial_lrate = 0.005
        drop = 0.5
        epochs_drop = 10.0
        lrate = initial_lrate * math.pow(drop, 
                math.floor((1+epoch)/epochs_drop))
    return lrate
  
lrate = LearningRateScheduler(step_decay)
callbacks_list= [checkpoint, lrate]

In [10]:
TRAIN_STEP_SIZE = train_generator.n//train_generator.batch_size
VAL_STEP_SIZE = val_generator.n//val_generator.batch_size

In [11]:
history = pretrained_model.fit(train_generator,
          steps_per_epoch=TRAIN_STEP_SIZE,
          epochs=30,
          validation_data=val_generator,
          validation_steps=VAL_STEP_SIZE,
          callbacks=callbacks_list)

Epoch 1/30
Epoch 00001: val_accuracy improved from -inf to 0.84901, saving model to cutout_model_fold4.h5
Epoch 2/30
Epoch 00002: val_accuracy improved from 0.84901 to 0.88785, saving model to cutout_model_fold4.h5
Epoch 3/30
Epoch 00003: val_accuracy improved from 0.88785 to 0.89369, saving model to cutout_model_fold4.h5
Epoch 4/30
Epoch 00004: val_accuracy did not improve from 0.89369
Epoch 5/30
Epoch 00005: val_accuracy did not improve from 0.89369
Epoch 6/30
Epoch 00006: val_accuracy improved from 0.89369 to 0.89457, saving model to cutout_model_fold4.h5
Epoch 7/30
Epoch 00007: val_accuracy did not improve from 0.89457
Epoch 8/30
Epoch 00008: val_accuracy improved from 0.89457 to 0.89515, saving model to cutout_model_fold4.h5
Epoch 9/30
Epoch 00009: val_accuracy did not improve from 0.89515
Epoch 10/30
Epoch 00010: val_accuracy did not improve from 0.89515
Epoch 11/30
Epoch 00011: val_accuracy improved from 0.89515 to 0.91472, saving model to cutout_model_fold4.h5
Epoch 12/30
Epoch

In [12]:
np.save("fold4_validation.npy", history.history)