In [1]:
import os
import pyvips
import warnings
import fnmatch
import pandas as pd
import numpy as np
import imgaug as ia
import tensorflow as tf

from sklearn.model_selection import StratifiedKFold
from imgaug import augmenters as iaa
from sklearn.utils import class_weight
from scipy.misc import imresize

from keras import backend as K
from keras.utils import Sequence
from keras.applications import DenseNet201
from keras.layers import Dense, BatchNormalization, GlobalMaxPool2D
from keras.regularizers import l2
from keras.initializers import he_normal
from keras.optimizers import SGD, Adam
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler
from keras.models import load_model

Using TensorFlow backend.


In [2]:
TRAIN_REAL_FOLDER = 'D:/Datasets/IDRND Data/train/real'
TRAIN_SPOOF_FOLDER = 'D:/Datasets/IDRND Data/train/spoof'
VALIDATION_REAL_FOLDER = 'D:/Datasets/IDRND Data/validation/real'
VALIDATION_SPOOF_FOLDER = 'D:/Datasets/IDRND Data/validation/spoof'
TEST_FOLDER = 'D:/Datasets/IDRND Data/test'

FOLDS = 5
RANDOM_STATE = 17
IMAGE_HEIGHT = 400
IMAGE_WIDTH = 400
BATCH_SIZE = 4
REGULARIZER = 0.0001

warnings.filterwarnings('ignore')

In [3]:
def read_pyvips_image(path):
    image = pyvips.Image.new_from_file(path, access='sequential')
    memory_image = image.write_to_memory()
    numpy_image = np.ndarray(buffer=memory_image,
                             dtype=np.uint8,
                             shape=[image.height, image.width, image.bands])
    
    return numpy_image

def dataframe_to_fold_dict(dataframe, folds_indexes, fold_number, for_train=True):
    if for_train:
        fold = dataframe.iloc[folds_indexes[fold_number][0], :]
    else:
        fold = dataframe.iloc[folds_indexes[fold_number][1], :]
    return dict(zip(fold['ImagePathway'], fold['Label']))

#### VALIDATION CREATING

In [4]:
def get_image_names(folder, extension='*.png'):
    image_names = fnmatch.filter(os.listdir(folder), extension)
    return list(map(lambda image_name: os.path.join(folder, image_name), image_names))

def get_merged_image_pathways(real_folder, spoof_folder):
    return get_image_names(real_folder) + get_image_names(spoof_folder)

real = pd.DataFrame({'ImagePathway': get_merged_image_pathways(TRAIN_REAL_FOLDER, VALIDATION_REAL_FOLDER),
                     'Label': 0})
spoof = pd.DataFrame({'ImagePathway': get_merged_image_pathways(TRAIN_SPOOF_FOLDER, VALIDATION_SPOOF_FOLDER),
                      'Label': 1})

train = pd.concat([real, spoof])
train = train.sample(frac=1, random_state=RANDOM_STATE).reset_index(drop=True)
skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=RANDOM_STATE)
folds_indexes = list(skf.split(train['ImagePathway'], train['Label']))

class_weights = class_weight.compute_class_weight('balanced', np.unique(train['Label']), train['Label'])

#### AUGMENTATIONS

In [5]:
augmentations_pipline = iaa.Sequential([
    iaa.Sometimes(0.8, iaa.OneOf([
        iaa.Multiply((0.3, 2.5), per_channel=0.5),
        iaa.ContrastNormalization((0.5, 1.5), per_channel=0.8),
        iaa.Grayscale(alpha=(0.5, 1.0)),
    ])),
    
    iaa.OneOf([
        iaa.Affine(scale=(1, 1.8), rotate=(0, 360), shear=(0, 20), backend='cv2'),
        iaa.PerspectiveTransform(scale=(0.01, 0.10)),
        iaa.PiecewiseAffine(scale=(0.01, 0.05)),
        iaa.Sequential([
            iaa.Fliplr(0.6),
            iaa.Flipud(0.3),
        ])
    ])
])

In [6]:
class DataGenerator(Sequence):
    def __init__(self, pathways_with_labels, augmentations_pipline=None, target_size=(400, 400, 3), batch_size=32, shuffle=True):
        self.pathways_with_labels = pathways_with_labels
        self.pathways = list(pathways_with_labels.keys())
        self.augmentations_pipline = augmentations_pipline
        self.target_size = target_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        
        
    def __len__(self):
        return int(np.ceil(len(self.pathways) / float(self.batch_size)))
        
        
    def __getitem__(self, index):
        batch_pathways = self.pathways[index*self.batch_size : (index + 1)*self.batch_size]
        batch_size = np.minimum(self.batch_size, len(batch_pathways))
        batch_x = np.empty((batch_size, *self.target_size), dtype=np.uint8)
        batch_y = np.empty(batch_size, dtype=np.int)

        for i, pathway in enumerate(batch_pathways):
            image = read_pyvips_image(pathway)
            image = imresize(image, (self.target_size[0], self.target_size[1]))
            batch_x[i, ...] = image
            batch_y[i] = self.pathways_with_labels[pathway]
            
        if self.augmentations_pipline:
            batch_x = self.augmentations_pipline.augment_images(batch_x)
            
        batch_x = (batch_x / 256.) - 0.5
        return batch_x, batch_y
        
        
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.pathways)

#### BUILD MODEL

In [7]:
model_base = DenseNet201(weights='imagenet',
                         include_top=False,
                         input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, 3))

gmp = GlobalMaxPool2D() (model_base.output)
fc1 = Dense(1028,
            activation='elu',
            kernel_regularizer=l2(REGULARIZER),
            kernel_initializer=he_normal(RANDOM_STATE),
            use_bias=True,
            bias_regularizer=l2(REGULARIZER),
            bias_initializer=he_normal(RANDOM_STATE)) (gmp)
bn1  = BatchNormalization() (fc1)
fc2  = Dense(512,
             activation='elu',
             kernel_regularizer=l2(REGULARIZER),
             kernel_initializer=he_normal(RANDOM_STATE),
             use_bias=True,
             bias_regularizer=l2(REGULARIZER),
             bias_initializer=he_normal(RANDOM_STATE)) (bn1)
bn2  = BatchNormalization() (fc2)
output = Dense(1, activation='sigmoid') (bn2)

model = Model(input=model_base.input, output=output)

In [8]:
model_base.trainable = True
set_trainable = False
for layer in model_base.layers:
    if layer.name == 'global_max_pooling2d_1':
        set_trainable = True
    if set_trainable:
        layer.trainable = True
    else:
        layer.trainable = False

#### TRAINING FREEZE MODEL

In [9]:
def roc_auc(y_true, y_pred):
    roc_auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return roc_auc

def roc_auc_loss(y_true, y_pred):
    with tf.name_scope('RocAucScore'):
        pos = tf.boolean_mask(y_pred, tf.cast(y_true, tf.bool))
        neg = tf.boolean_mask(y_pred, ~tf.cast(y_true, tf.bool))
        pos = tf.expand_dims(pos, 0)
        neg = tf.expand_dims(neg, 1)
        # original paper suggests performance is robust to exact parameter choice
        gamma = 0.2
        p     = 3
        difference = tf.zeros_like(pos * neg) + pos - neg - gamma
        masked = tf.boolean_mask(difference, difference < 0.0)
        return tf.reduce_sum(tf.pow(-masked, p))

model.compile(loss=roc_auc_loss,
              optimizer=SGD(0.001, momentum=0.9),
              metrics=[roc_auc])

In [10]:
def exp_decay(epoch):
    initial_learning_rate = 0.001
    k = 0.1
    learning_rate = initial_learning_rate * np.exp(-k*epoch)
    return learning_rate

learning_rate = LearningRateScheduler(exp_decay,
                                      verbose=1)
model_checkpoint = ModelCheckpoint('DenseNet201_Freeze_Model1.h5',
                                   monitor='val_roc_auc',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='max',
                                   period=1)
early_stopping = EarlyStopping(monitor='val_roc_auc',
                               min_delta=0,
                               patience=3,
                               verbose=1,
                               mode='max')
callbacks = [early_stopping, learning_rate, model_checkpoint]

In [11]:
fold_train_pathways_with_labels = dataframe_to_fold_dict(train, 
                                                         folds_indexes=folds_indexes, 
                                                         fold_number=1, 
                                                         for_train=True)

fold_valid_pathways_with_labels = dataframe_to_fold_dict(train, 
                                                         folds_indexes=folds_indexes, 
                                                         fold_number=1, 
                                                         for_train=False)

train_generator = DataGenerator(fold_train_pathways_with_labels,
                                augmentations_pipline=augmentations_pipline,
                                target_size=(IMAGE_HEIGHT, IMAGE_WIDTH, 3),
                                batch_size=BATCH_SIZE,
                                shuffle=True)

valid_generator = DataGenerator(fold_valid_pathways_with_labels,
                                augmentations_pipline=None,
                                target_size=(IMAGE_HEIGHT, IMAGE_WIDTH, 3),
                                batch_size=BATCH_SIZE,
                                shuffle=True)

history = [model.fit_generator(train_generator,
                               epochs=10,
                               callbacks=callbacks,
                               validation_data=valid_generator,
                               verbose=1,
                               class_weight=class_weights,
                               workers=4,
                               use_multiprocessing=False)
]

Epoch 1/10

Epoch 00001: LearningRateScheduler setting learning rate to 0.001.

Epoch 00001: val_roc_auc improved from -inf to 0.75514, saving model to DenseNet201_Freeze_Model1.h5
Epoch 2/10

Epoch 00002: LearningRateScheduler setting learning rate to 0.0009048374180359595.

Epoch 00002: val_roc_auc improved from 0.75514 to 0.75804, saving model to DenseNet201_Freeze_Model1.h5
Epoch 3/10

Epoch 00003: LearningRateScheduler setting learning rate to 0.0008187307530779819.

Epoch 00003: val_roc_auc improved from 0.75804 to 0.76148, saving model to DenseNet201_Freeze_Model1.h5
Epoch 4/10

Epoch 00004: LearningRateScheduler setting learning rate to 0.0007408182206817179.

Epoch 00004: val_roc_auc improved from 0.76148 to 0.76996, saving model to DenseNet201_Freeze_Model1.h5
Epoch 5/10

Epoch 00005: LearningRateScheduler setting learning rate to 0.0006703200460356394.

Epoch 00005: val_roc_auc improved from 0.76996 to 0.77154, saving model to DenseNet201_Freeze_Model1.h5
Epoch 6/10

Epoch 0

#### TRAINING MODEL

In [15]:
model = load_model('DenseNet201_Freeze_Model1.h5', custom_objects={'roc_auc_loss': roc_auc_loss, 'roc_auc': roc_auc})
for layer in model.layers:
    layer.trainable = True

In [16]:
def exp_decay(epoch):
    initial_learning_rate = 0.0001
    k = 0.1
    learning_rate = initial_learning_rate * np.exp(-k*epoch)
    
    return learning_rate

learning_rate = LearningRateScheduler(exp_decay, verbose=1)
model_checkpoint = ModelCheckpoint('DenseNet201_Model1.h5',
                                   monitor='val_roc_auc',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False,
                                   mode='max',
                                   period=1)

early_stopping = EarlyStopping(monitor='val_roc_auc',
                               min_delta=0,
                               patience=10,
                               verbose=1,
                               mode='max')

callbacks = [early_stopping, model_checkpoint, learning_rate]

In [None]:
model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=0.0001, decay=0.0001),
              metrics=['accuracy', roc_auc])

history = [model.fit_generator(train_generator,
                               epochs=70,
                               callbacks=callbacks,
                               validation_data=valid_generator,
                               verbose=1,
                               class_weight=class_weights,
                               workers=4,
                               use_multiprocessing=False)
]

Epoch 1/70

Epoch 00001: LearningRateScheduler setting learning rate to 0.0001.

Epoch 00001: val_roc_auc improved from -inf to 0.77884, saving model to DenseNet201_Model1.h5
Epoch 2/70

Epoch 00002: LearningRateScheduler setting learning rate to 9.048374180359596e-05.

Epoch 00002: val_roc_auc improved from 0.77884 to 0.81805, saving model to DenseNet201_Model1.h5
Epoch 3/70

Epoch 00003: LearningRateScheduler setting learning rate to 8.187307530779819e-05.

Epoch 00003: val_roc_auc improved from 0.81805 to 0.85216, saving model to DenseNet201_Model1.h5
Epoch 4/70

Epoch 00004: LearningRateScheduler setting learning rate to 7.408182206817179e-05.

Epoch 00004: val_roc_auc improved from 0.85216 to 0.87761, saving model to DenseNet201_Model1.h5
Epoch 5/70

Epoch 00005: LearningRateScheduler setting learning rate to 6.703200460356394e-05.

Epoch 00005: val_roc_auc improved from 0.87761 to 0.89725, saving model to DenseNet201_Model1.h5
Epoch 6/70

Epoch 00006: LearningRateScheduler settin