In [1]:
# !pip install keras_vggface
# !pip install keras_applications
# !pip install deepface

In [2]:
from collections import defaultdict
from glob import glob
from random import choice, sample

import cv2
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract, LayerNormalization, BatchNormalization
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L2

from deepface.commons.functions import find_input_shape, normalize_input
from deepface.DeepFace import build_model

from tqdm import tqdm

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

2021-08-11 05:39:58.469055: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [3]:
# Hyperparameters
BASE_MODEL = 'Facenet'
IGNORE_TOP_NLAYERS_ARCH = 5
IGNORE_BOTTOM_NLAYERS_TUNE = 0
IGNORE_TOP_NLAYERS_TUNE = 0
NORMALIZATION = 'base'
FINE_TUNE = False

# Modify paths as per your method of saving them
BASE_PATH = "/root/KinshipRecognition"
TRAIN_FILE_PATH = f"{BASE_PATH}/data/aug_train_ds.csv"
TRAIN_FOLDERS_PATH = f"{BASE_PATH}/data/train/train-faces/"

# All images belonging to families F09** will be used to create the validation set while training the model
# For final submission, you can add these to the training data as well
# val_families_list = ["F06"]
val_families_list = ["F02","F04","F06","F08", "F09"]

# Output file
MODEL_NAME = f"ensemble_deepface_{BASE_MODEL}_notune_dense32-128-32_drop05"

# Get input shape and normalization method.
INPUT_SHAPE = find_input_shape(build_model(BASE_MODEL))
NORMALIZATION = 'base' 

2021-08-11 05:39:59.512521: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-08-11 05:39:59.513381: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-08-11 05:39:59.615340: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:d8:00.0 name: Tesla V100-PCIE-32GB computeCapability: 7.0
coreClock: 1.38GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2021-08-11 05:39:59.615376: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2021-08-11 05:39:59.616967: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2021-08-11 05:39:59.617025: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2

In [4]:
def get_train_val(family_name):

    val_families = family_name

    all_images = glob(TRAIN_FOLDERS_PATH + "*/*/*.jpg")
    train_images = [x for x in all_images if val_families not in x]
    val_images = [x for x in all_images if val_families in x]

    train_person_to_images_map = defaultdict(list)

    ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

    for x in train_images:
        train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

    val_person_to_images_map = defaultdict(list)

    for x in val_images:
        val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)
    relationships = pd.read_csv(TRAIN_FILE_PATH)
    relationships = list(zip(relationships.p1.values, relationships.p2.values, relationships.relationship.values))
    relationships = [(x[0],x[1],x[2]) for x in relationships if x[0][:10] in ppl and x[1][:10] in ppl]    

    train = [x for x in relationships if val_families not in x[0]]
    val = [x for x in relationships if val_families in x[0]]
    return train, val, train_person_to_images_map, val_person_to_images_map

In [5]:
def read_img(path, input_shape, normalization='base'):
    img = cv2.imread(path, -1)
    img = cv2.resize(img, input_shape)
    img = cv2.normalize(img,  np.zeros(img.shape[:2]), 0, 255, cv2.NORM_MINMAX)
    img = normalize_input(img, normalization=NORMALIZATION)
    return np.array(img).astype(np.float)

In [6]:
def gen(list_tuples, person_to_images_map, input_shape, batch_size=16, normalization='base'):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size)
        
        # All the samples are taken from train_ds.csv, labels are in the labels column
        labels = []
        for tup in batch_tuples:
            labels.append(tup[2])
        labels = np.array(labels)

        # Original images preprocessed
        X1 = [x[0] for x in batch_tuples]
        X1 = np.array([read_img(TRAIN_FOLDERS_PATH + x, input_shape) for x in X1])
        
        X2 = [x[1] for x in batch_tuples]
        X2 = np.array([read_img(TRAIN_FOLDERS_PATH + x, input_shape) for x in X2])
        
        # Mirrored images
        X1_mirror = np.asarray([cv2.flip(x, 1) for x in X1])
        X2_mirror = np.asarray([cv2.flip(x, 1) for x in X2])
        X1 = np.r_[X1, X1_mirror]
        X2 = np.r_[X2, X2_mirror]
        
        yield [X1, X2], np.r_[labels, labels]

In [7]:
def baseline_model(model_name, fine_tune=True):
    input_1 = Input(shape=INPUT_SHAPE + (3,))
    input_2 = Input(shape=INPUT_SHAPE + (3,))
    
    backbone = build_model(BASE_MODEL)
    backbone = Model(backbone.layers[0].input, backbone.layers[-IGNORE_TOP_NLAYERS_ARCH].output)
    for x in backbone.layers:
        x.trainable = False

    if fine_tune:
        for x in backbone.layers[:IGNORE_BOTTOM_NLAYERS_TUNE]:
            x.trainable = False
        if IGNORE_TOP_NLAYERS_TUNE == 0:
            for x in backbone.layers[IGNORE_BOTTOM_NLAYERS_TUNE:]:
                x.trainable = True
        else:
            for x in backbone.layers[IGNORE_BOTTOM_NLAYERS_TUNE:-IGNORE_TOP_NLAYERS_TUNE]:
                x.trainable = True

    for x in backbone.layers:
        print(x.name, x.trainable)
                
    x1 = backbone(input_1)
    x2 = backbone(input_2)

    x1 = GlobalAvgPool2D()(x1)
    x2 = GlobalAvgPool2D()(x2)

    x1 = LayerNormalization(axis=-1, epsilon=0.001, center=False, scale=False)(x1)
    x2 = LayerNormalization(axis=-1, epsilon=0.001, center=False, scale=False)(x2)

    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])
    x1_ = Multiply()([x1, x1])
    x2_ = Multiply()([x2, x2])
    x4 = Subtract()([x1_, x2_])
    x5 = Multiply()([x1, x2])
    x = Concatenate(axis=-1)([x3, x4, x5])
        
#     x = LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True)(x)
    x = Dense(32, activation="relu")(x)
    x = Dropout(0.05)(x)    
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.05)(x)    
    x = Dense(32, activation="tanh")(x)
#     x = LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=False)(x)
    x = Dropout(0.05)(x)    
    out = Dense(1, kernel_regularizer=L2(.01), activation="sigmoid")(x)

    model = Model([input_1, input_2], out)
    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00001))
    model.summary()

    return model


In [8]:
# Print layers of backbone
backbone = build_model(BASE_MODEL)
backbone = Model(backbone.layers[0].input, backbone.layers[-IGNORE_TOP_NLAYERS_ARCH].output)
for x in backbone.layers:
    print(x.name)

input_1
Conv2d_1a_3x3
Conv2d_1a_3x3_BatchNorm
Conv2d_1a_3x3_Activation
Conv2d_2a_3x3
Conv2d_2a_3x3_BatchNorm
Conv2d_2a_3x3_Activation
Conv2d_2b_3x3
Conv2d_2b_3x3_BatchNorm
Conv2d_2b_3x3_Activation
MaxPool_3a_3x3
Conv2d_3b_1x1
Conv2d_3b_1x1_BatchNorm
Conv2d_3b_1x1_Activation
Conv2d_4a_3x3
Conv2d_4a_3x3_BatchNorm
Conv2d_4a_3x3_Activation
Conv2d_4b_3x3
Conv2d_4b_3x3_BatchNorm
Conv2d_4b_3x3_Activation
Block35_1_Branch_2_Conv2d_0a_1x1
Block35_1_Branch_2_Conv2d_0a_1x1_BatchNorm
Block35_1_Branch_2_Conv2d_0a_1x1_Activation
Block35_1_Branch_1_Conv2d_0a_1x1
Block35_1_Branch_2_Conv2d_0b_3x3
Block35_1_Branch_1_Conv2d_0a_1x1_BatchNorm
Block35_1_Branch_2_Conv2d_0b_3x3_BatchNorm
Block35_1_Branch_1_Conv2d_0a_1x1_Activation
Block35_1_Branch_2_Conv2d_0b_3x3_Activation
Block35_1_Branch_0_Conv2d_1x1
Block35_1_Branch_1_Conv2d_0b_3x3
Block35_1_Branch_2_Conv2d_0c_3x3
Block35_1_Branch_0_Conv2d_1x1_BatchNorm
Block35_1_Branch_1_Conv2d_0b_3x3_BatchNorm
Block35_1_Branch_2_Conv2d_0c_3x3_BatchNorm
Block35_1_Branch_

In [9]:
for i in range(len(val_families_list)):

    print('##############################')
    print(f'Iteration {i}: Validation on {val_families_list[i]}')
    print('##############################')

    train, val, train_person_to_images_map, val_person_to_images_map = get_train_val(val_families_list[i])
    file_path = f"{BASE_PATH}/log/model/{MODEL_NAME}_{i}.h5"
    checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.3, patience=30, verbose=1)
    callbacks_list = [checkpoint, reduce_on_plateau]
    
    model = baseline_model(BASE_MODEL, fine_tune=FINE_TUNE)
    
    history = model.fit(gen(train, train_person_to_images_map, INPUT_SHAPE, batch_size=16), 
                        validation_data=gen(val, val_person_to_images_map, INPUT_SHAPE, batch_size=16), 
                        epochs=25, steps_per_epoch=300, validation_steps=200,
                        verbose=1, callbacks=callbacks_list, 
                        use_multiprocessing=False, workers=1)

##############################
Iteration 0: Validation on F09
##############################
input_1 False
Conv2d_1a_3x3 False
Conv2d_1a_3x3_BatchNorm False
Conv2d_1a_3x3_Activation False
Conv2d_2a_3x3 False
Conv2d_2a_3x3_BatchNorm False
Conv2d_2a_3x3_Activation False
Conv2d_2b_3x3 False
Conv2d_2b_3x3_BatchNorm False
Conv2d_2b_3x3_Activation False
MaxPool_3a_3x3 False
Conv2d_3b_1x1 False
Conv2d_3b_1x1_BatchNorm False
Conv2d_3b_1x1_Activation False
Conv2d_4a_3x3 False
Conv2d_4a_3x3_BatchNorm False
Conv2d_4a_3x3_Activation False
Conv2d_4b_3x3 False
Conv2d_4b_3x3_BatchNorm False
Conv2d_4b_3x3_Activation False
Block35_1_Branch_2_Conv2d_0a_1x1 False
Block35_1_Branch_2_Conv2d_0a_1x1_BatchNorm False
Block35_1_Branch_2_Conv2d_0a_1x1_Activation False
Block35_1_Branch_1_Conv2d_0a_1x1 False
Block35_1_Branch_2_Conv2d_0b_3x3 False
Block35_1_Branch_1_Conv2d_0a_1x1_BatchNorm False
Block35_1_Branch_2_Conv2d_0b_3x3_BatchNorm False
Block35_1_Branch_1_Conv2d_0a_1x1_Activation False
Block35_1_Branch_2_Con

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 160, 160, 3) 0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 160, 160, 3) 0                                            
__________________________________________________________________________________________________
model_1 (Functional)            (None, 3, 3, 1792)   22578384    input_2[0][0]                    
                                                                 input_3[0][0]                    
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1792)         0           model_1[0][0]              

2021-08-11 05:40:24.980620: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-08-11 05:40:25.003015: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2300000000 Hz
2021-08-11 05:40:32.401207: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2021-08-11 05:40:32.609550: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7



Epoch 00001: val_acc improved from -inf to 0.51266, saving model to /root/KinshipRecognition/log/model/ensemble_deepface_Facenet_notune_dense32-128-32_drop05_0.h5
Epoch 2/25

Epoch 00002: val_acc improved from 0.51266 to 0.54125, saving model to /root/KinshipRecognition/log/model/ensemble_deepface_Facenet_notune_dense32-128-32_drop05_0.h5
Epoch 3/25

Epoch 00003: val_acc did not improve from 0.54125
Epoch 4/25

Epoch 00004: val_acc did not improve from 0.54125
Epoch 5/25

Epoch 00005: val_acc improved from 0.54125 to 0.57891, saving model to /root/KinshipRecognition/log/model/ensemble_deepface_Facenet_notune_dense32-128-32_drop05_0.h5
Epoch 6/25

Epoch 00006: val_acc did not improve from 0.57891
Epoch 7/25

Epoch 00007: val_acc did not improve from 0.57891
Epoch 8/25

Epoch 00008: val_acc did not improve from 0.57891
Epoch 9/25

Epoch 00009: val_acc did not improve from 0.57891
Epoch 10/25

Epoch 00010: val_acc did not improve from 0.57891
Epoch 11/25

Epoch 00011: val_acc did not imp

KeyboardInterrupt: 

In [None]:
test_path = f"{BASE_PATH}/data/test/"
submission = pd.read_csv(f'{BASE_PATH}/data/test_ds.csv')
preds_for_sub = np.zeros(submission.shape[0])
all_preds = list()
for i in range(len(val_families_list)):

    print('##############################')
    print(f'Iteration {i}: Validation on {val_families_list[i]}')
    print('##############################')
    
    model = baseline_model(BASE_MODEL, fine_tune=FINE_TUNE)
    file_path = f"{BASE_PATH}/log/model/{MODEL_NAME}_{i}.h5"
    model.load_weights(file_path)

    # Predictions
    predictions = []
    for j in range(0, len(submission.p1.values), 32):
        X1 = submission.p1.values[j:j+32]
        X1 = np.array([read_img(test_path + x, INPUT_SHAPE) for x in X1])

        X2 = submission.p2.values[j:j+32]
        X2 = np.array([read_img(test_path + x, INPUT_SHAPE) for x in X2])

        pred = model.predict([X1, X2]).ravel().tolist()
        predictions += pred    
    
    all_preds.append(np.array(predictions))
    preds_for_sub += np.array(predictions) / len(val_families_list)

    
all_preds = np.asarray(all_preds)
submission['score'] = preds_for_sub
pd.DataFrame(all_preds).to_csv(f"{BASE_PATH}/log/results/{MODEL_NAME}_allpreds.csv", index=False)
submission.to_csv(f"{BASE_PATH}/log/results/{MODEL_NAME}.csv", index=False)

In [None]:
print(np.sum(pred_for_sub <= 0.5))
print(len(pred_for_sub) + '\n')
for line in preds_for_sub:
    print(line)