In [1]:
# !pip install git+https://github.com/rcmalli/keras-vggface.git
# !pip install keras_vggface
# !pip install keras_applications

In [2]:
from collections import defaultdict
from glob import glob
from random import choice, sample

import cv2
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract, LayerNormalization, BatchNormalization, Layer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L2

from tf2_keras_vggface.utils import preprocess_input
from tf2_keras_vggface.vggface import VGGFace

from tensorflow.python.ops import nn_ops
import functools


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

2021-08-12 19:11:51.870492: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


Using VGGFace compatible with TensorFlow2.


In [5]:
# Hyperparameters
BASE_MODEL = 'vgg16'
INPUT_SHAPE = (224, 224,)
IGNORE_BOTTOM_NLAYERS_TUNE = -2 #senet50: -6; resnet50:-5; vgg16: -2
IGNORE_TOP_NLAYERS_TUNE = 0
FINE_TUNE = False
EPOCHS = 25

# Modify paths as per your method of saving them
BASE_PATH = "/root/KinshipRecognition"
TRAIN_FILE_PATH = f"{BASE_PATH}/data/aug_train_ds.csv"
TRAIN_FOLDERS_PATH = f"{BASE_PATH}/data/train/train-faces/"

# Output file
MODEL_NAME = f"ensemble_vggface_{BASE_MODEL}_finetune8_dense32-128-32_drop05"

# All images belonging to families F09** will be used to create the validation set while training the model
# For final submission, you can add these to the training data as well
val_families_list = ["F06"]
# val_families_list = ["F02","F04","F06","F08", "F09"]
# val_families_list = ["F00", "F01", "F02", "F03", "F04", "F05", "F06", "F07", "F08", "F09"]

In [6]:
def get_train_val(family_name):

    val_families = family_name

    all_images = glob(TRAIN_FOLDERS_PATH + "*/*/*.jpg")
    train_images = [x for x in all_images if val_families not in x]
    val_images = [x for x in all_images if val_families in x]

    train_person_to_images_map = defaultdict(list)

    ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

    for x in train_images:
        train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

    val_person_to_images_map = defaultdict(list)

    for x in val_images:
        val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)
    relationships = pd.read_csv(TRAIN_FILE_PATH)
    relationships = list(zip(relationships.p1.values, relationships.p2.values, relationships.relationship.values))
    relationships = [(x[0],x[1],x[2]) for x in relationships if x[0][:10] in ppl and x[1][:10] in ppl]    

    train = [x for x in relationships if val_families not in x[0]]
    val = [x for x in relationships if val_families in x[0]]
    return train, val, train_person_to_images_map, val_person_to_images_map

In [7]:
def read_img(path, input_shape):
    img = cv2.imread(path, -1)
    img = cv2.resize(img, input_shape)
    img = cv2.normalize(img,  np.zeros(img.shape[:2]), 0, 255, cv2.NORM_MINMAX)
    return np.array(img).astype(np.float)

In [8]:
def gen(list_tuples, person_to_images_map, input_shape, batch_size=16, normalization='base'):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size)
        
        # All the samples are taken from train_ds.csv, labels are in the labels column
        labels = []
        for tup in batch_tuples:
            labels.append(tup[2])
        labels = np.array(labels)

        # Original images preprocessed
        X1 = [x[0] for x in batch_tuples]
        X1 = np.array([read_img(TRAIN_FOLDERS_PATH + x, input_shape) for x in X1])
        
        X2 = [x[1] for x in batch_tuples]
        X2 = np.array([read_img(TRAIN_FOLDERS_PATH + x, input_shape) for x in X2])
        
        # Mirrored images
        X1_mirror = np.asarray([cv2.flip(x, 1) for x in X1])
        X2_mirror = np.asarray([cv2.flip(x, 1) for x in X2])
        X1 = np.r_[X1, X1_mirror]
        X2 = np.r_[X2, X2_mirror]
        
        yield [X1, X2], np.r_[labels, labels]

In [9]:
class SelfAttentionKernel(Layer):
    
    def __init__(self, units, kernel_dim2D, value_dim, output_dim=None,
                 kernel_initializer="glorot_uniform", kernel_regularizer='l2', **kwargs):
    
        super(SelfAttentionKernel, self).__init__(**kwargs)
        
        self.units = units # Same as num_output_channels
        self.value_dim = value_dim
        assert len(kernel_dim2D) == 2
        self.kernel_dim2D = kernel_dim2D
        self.kernel_len = kernel_dim2D[0] * kernel_dim2D[1]
        self.output_dim = output_dim if output_dim else kernel_len
        self.kernel_initializer = kernel_initializer
        self.kernel_regularizer = kernel_regularizer
        
        self.key_w = None
        self.query_w = None
        self.value_w = None
        self.scale = self.value_dim ** -0.5
        
    def build(self, input_shape):
        
        self.query_w = self.add_weight(shape=(self.units, self.value_dim, self.output_dim),
                                       initializer=self.kernel_initializer,
                                       regularizer=self.kernel_regularizer,
                                       trainable=True)
        self.key_w = self.add_weight(shape=(self.units, self.value_dim, self.output_dim),
                                     initializer=self.kernel_initializer,
                                     regularizer=self.kernel_regularizer,
                                     trainable=True)
        self.value_w = self.add_weight(shape=(self.units, self.value_dim, self.kernel_len),
                                     initializer=self.kernel_initializer,
                                     regularizer=self.kernel_regularizer,
                                     trainable=True)

    def call(self, value):    
        
        # Query, values, and keys are of shape=(N, h, w, c)
        
        N, h, w, c = value.shape
        value = tf.transpose(value, perm=(0, 3, 1, 2))
        value = tf.reshape(value, shape=(N, c, h*w))
        
        qW = tf.matmul(tf.expand_dims(value, 1), self.query_w)
        kW = tf.matmul(tf.expand_dims(value, 1), self.key_w)
        dot = tf.matmul(qW, tf.transpose(kW, perm=(0, 1, 3, 2,)))
        attn_w = tf.nn.softmax(dot * self.scale)
        vW = tf.matmul(tf.expand_dims(value, 1), self.value_w)
        flat_kernel = tf.einsum('ijkl, ijlm -> ijkm', attn_w, vW)
        kernel = tf.reshape(flat_kernel, shape=(N, self.units, c, self.kernel_dim2D[0], self.kernel_dim2D[1]))
        kernel = tf.transpose(kernel, perm=(0, 3, 4, 2, 1))
        return kernel
    


In [10]:
class CrossAttentionConv2D(Layer):
    def __init__(self, strides=[1, 1, 1, 1], padding='VALID',
                 data_format=None, dilations=None, **kwargs):

        super(CrossAttentionConv2D, self).__init__(**kwargs)
        
        self.strides = strides
        self.padding = padding
        self.data_format = data_format
        self.dilations = dilations
        
    def call(self, input_img, kernel):
        
        # Match batch and channels dimensions
        assert kernel.shape[0] == input_img.shape[0]
        assert kernel.shape[3] == input_img.shape[3]

        n, h, w, in_c = input_img.shape
        _, k_h, k_w, _, out_c = kernel.shape
        
        # Kernel shape -> (h, w, n*in_c, out_c)
        kernel = tf.transpose(kernel, perm=(1, 2, 0, 3, 4))
        kernel = tf.reshape(kernel, shape=(k_h, k_w, n*in_c, out_c))        
        # Inputs shape -> (h, w, n*in_c)
        input_img = tf.transpose(input_img, perm=(1, 2, 0, 3))
        input_img = tf.reshape(input_img, shape=(1, h, w, n*in_c))        
        # Do depth-wise convolution
        conv_out = tf.nn.depthwise_conv2d(input_img, kernel, self.strides, self.padding, self.dilations)
        # Output shape -> (n, out_h, out_w, out_c)
        _, out_h, out_w = conv_out.shape[:3]
        conv_out = tf.reshape(conv_out, shape=(out_h, out_w, n, in_c, out_c))
        conv_out = tf.transpose(conv_out, perm=(2, 0, 1, 3, 4))
        conv_out = tf.reduce_sum(conv_out, axis=3)
        
        return conv_out

In [None]:
class ConvTransformer(Layer):

    def __init__(self, units, value_dim, kernel_dim2D, out_dim2D, out_channels=None, 
                 strides=[1, 1, 1, 1], padding='VALID', dense_config=None,
                 kernel_initializer="glorot_uniform", kernel_regularizer='l2',
                 data_format=None, dilations=None, **kwargs):
        
        # value_dim = H*W
        
        super(ConvTransformer, self).__init__(**kwargs)
        self.value_dim = value_dim
        self.out_dim2D = out_dim2D
        
        self.norm = LayerNormalization(axis=[0, 1], epsilon=.0001, center=False, scale=False)
        self.self_attn = SelfAttentionKernel(out_channels, (3, 3), value_dim, output_dim=out_channels)
        self.cross_conv = CrossAttentionConv2D(strides=[1, 1, 1, 1], padding=padding)
        self.dense_block = Sequential()
        if dense_config:
            for i in dense_config:
                self.dense_block.add(Dense(i[0], activation=i[1]))
        self.dense_block.add(Dense(out_dim2D[0] * out_dim2D[1], activation='linear'))
        
    def call(self, x1, x2):        
        
        assert x1.shape == x2.shape
        
        N, h, w, in_c = x1.shape
        # Normalize input images by (H,W)
        x1 = self.norm(x1)
        x2 = self.norm(x2)
        # Output from cross attention
        conv_x1 = self.norm(self.cross_conv(x2, self.self_attn(x1)))
        conv_x2 = self.norm(self.cross_conv(x1, self.self_attn(x2)))
        
        N, h, w, out_c = conv_x1.shape
        # Flatten (H, W) dimension
        conv_x1 = tf.transpose(conv_x1, perm=(0, 3, 1, 2))
        conv_x2 = tf.transpose(conv_x2, perm=(0, 3, 1, 2))
        conv_x1 = tf.reshape(conv_x1, shape=(N, out_c, h*w))
        conv_x2 = tf.reshape(conv_x2, shape=(N, out_c, h*w))
        # Feed into dense network
        out_x1 = self.dense_block(conv_x1)
        out_x2 = self.dense_block(conv_x2)
        # Reshape to shape of image.
        out_x1 = tf.reshape(out_x1, shape=(N, out_c, self.out_dim2D[0], self.out_dim2D[1]))
        out_x2 = tf.reshape(out_x2, shape=(N, out_c, self.out_dim2D[0], self.out_dim2D[1]))
        out_x1 = tf.transpose(out_x1, perm=(0, 2, 3, 1))        
        out_x2 = tf.transpose(out_x2, perm=(0, 2, 3, 1))
        out_x1 = self.norm(out_x1)
        out_x2 = self.norm(out_x2)
        
        return out_x1, out_x2

In [None]:
def baseline_model(model_name, fine_tune=True):
    input_1 = Input(shape=INPUT_SHAPE + (3,))
    input_2 = Input(shape=INPUT_SHAPE + (3,))

    backbone = VGGFace(model=model_name, include_top=False)
    for x in backbone.layers:
        x.trainable = False

    if fine_tune:
        for x in backbone.layers[:IGNORE_BOTTOM_NLAYERS_TUNE]:
            x.trainable = False
        if IGNORE_TOP_NLAYERS_TUNE == 0:
            for x in backbone.layers[IGNORE_BOTTOM_NLAYERS_TUNE:]:
                x.trainable = True
        else:
            for x in backbone.layers[IGNORE_BOTTOM_NLAYERS_TUNE:-IGNORE_TOP_NLAYERS_TUNE]:
                x.trainable = True

    for x in backbone.layers:
        print(x.name, x.trainable)

    x1 = backbone(input_1)
    x2 = backbone(input_2)

    conv_transformer = ConvTransformer(256, 49, (2, 2), (4, 4), out_channels=256, 
                                       strides=[1, 1, 1, 1], padding='SAME', 
                                       dense_config=((1024, 'relu',),))
    x1, x2 = conv_transformer(x1, x2)
    x = Concatenate(axis=-1)([Flatten(x1), Flatten(x2)])
        
#     x = LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True)(x)
    x = Dense(32, activation="relu")(x)
    x = Dropout(0.05)(x)    
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.05)(x)    
    x = Dense(32, activation="tanh")(x)
#     x = LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=False)(x)
    x = Dropout(0.05)(x)    
    out = Dense(1, kernel_regularizer=L2(.01), activation="sigmoid")(x)

    model = Model([input_1, input_2], out)
    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00002))
    model.summary()
    
    return model

In [None]:
for i in range(len(val_families_list)):

    print('##############################')
    print(f'Iteration {i}: Validation on {val_families_list[i]}')
    print('##############################')

    train, val, train_person_to_images_map, val_person_to_images_map = get_train_val(val_families_list[i])
    file_path = f"{BASE_PATH}/log/model/{MODEL_NAME}_{i}.h5"
    checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.3, patience=30, verbose=1)
    callbacks_list = [checkpoint, reduce_on_plateau]
    
    model = baseline_model(BASE_MODEL, fine_tune=FINE_TUNE)
    
    history = model.fit(gen(train, train_person_to_images_map, INPUT_SHAPE, batch_size=16), 
                        validation_data=gen(val, val_person_to_images_map, INPUT_SHAPE, batch_size=16), 
                        epochs=EPOCHS, steps_per_epoch=300, validation_steps=200,
                        verbose=1, callbacks=callbacks_list, 
                        use_multiprocessing=False, workers=1)

In [None]:
test_path = f"{BASE_PATH}/data/test/"
submission = pd.read_csv(f'{BASE_PATH}/data/test_ds.csv')
preds_for_sub = np.zeros(submission.shape[0])
all_preds = list()
for i in range(len(val_families_list)):

    print('##############################')
    print(f'Iteration {i}: Validation on {val_families_list[i]}')
    print('##############################')
    
    model = baseline_model(BASE_MODEL, fine_tune=FINE_TUNE)
    file_path = f"{BASE_PATH}/log/model/{MODEL_NAME}_{i}.h5"
    model.load_weights(file_path)

    # Predictions
    predictions = []
    for j in range(0, len(submission.p1.values), 32):
        X1 = submission.p1.values[j:j+32]
        X1 = np.array([read_img(test_path + x, INPUT_SHAPE) for x in X1])

        X2 = submission.p2.values[j:j+32]
        X2 = np.array([read_img(test_path + x, INPUT_SHAPE) for x in X2])

        pred = model.predict([X1, X2]).ravel().tolist()
        predictions += pred    
    
    all_preds.append(np.array(predictions))
    preds_for_sub += np.array(predictions) / len(val_families_list)

    
all_preds = np.asarray(all_preds).T
submission['score'] = preds_for_sub
pd.DataFrame(all_preds).to_csv(f"{BASE_PATH}/log/results/{MODEL_NAME}_allpreds.csv", index=False)
submission.to_csv(f"{BASE_PATH}/log/results/{MODEL_NAME}.csv", index=False)

In [None]:
print(np.sum(preds_for_sub <= 0.5))
print(len(preds_for_sub), '\n')
for line in preds_for_sub:
    print(line)