#**Kinship Verification**

MOUNT GOOGLE DRIVE

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


INSTALL LIBS

In [None]:
%%capture
!pip install keras_vggface
!pip install keras_applications

In [None]:
from collections import defaultdict
from glob import glob
from random import choice, sample

import tensorflow as tf
import keras
import cv2
from imageio import imread
from skimage.transform import resize
import numpy as np
import pandas as pd
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract, Conv2D, Lambda, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace

TRAIN AND VALIDATION

In [None]:
train_file_path = "/gdrive/MyDrive/Kinship Recognition Starter/train_ds.csv"
train_folders_path = "/gdrive/MyDrive/Kinship Recognition Starter/train/train-faces/"

# All images belonging to families F09** will be used to create the validation set while training the model
# For final submission, you can add these to the training data as well
val_famillies = "F09"

all_images = glob(train_folders_path + "*/*/*.jpg") #all images
train_images = [x for x in all_images if val_famillies not in x] #all images except for F09*
val_images = [x for x in all_images if val_famillies in x] #all images that are F09*

ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images] #family/member/ for all images

train_person_to_images_map = defaultdict(list)
for x in train_images:
    train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x) #add a training person to map

val_person_to_images_map = defaultdict(list)
for x in val_images:
    val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x) #add a validation person to map

relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values, relationships.relationship.values))
relationships = [(x[0],x[1],x[2]) for x in relationships if x[0][:10] in ppl and x[1][:10] in ppl]

train = [x for x in relationships if val_famillies not in x[0]]
val = [x for x in relationships if val_famillies in x[0]]


from keras.preprocessing import image

def read_img(path):
    img = image.load_img(path, target_size=(224, 224))
    img = np.array(img).astype(np.float)
    return preprocess_input(img, version=2)

GENERATOR

In [None]:
####DO THIS GENERATOR #####
import copy
import os
def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size) #[('F0123/MID1/P01276_face0.jpg', 'F0644/MID2/P06777_face5.jpg', 0),...]
        
        labels = []
        X1 = []
        X2 = []
        for tup in batch_tuples:
            temp1 = tup[0].split('/')
            person1 = temp1[0] + '/' + temp1[1] #person1: /F0123/MID1
            temp2 = tup[1].split('/')
            person2 = temp2[0] + '/' + temp2[1] #person2: /F0123/MID1
            imgs_person1 = os.listdir(train_folders_path + person1) #imgs_person1: [P1, P2, P3...]
            imgs_person2 = os.listdir(train_folders_path + person2)
            length = len(imgs_person1)
            length = len(imgs_person2) if len(imgs_person2) < length else length
            length = min(3, length)

            for i in range(length):
                X1.append(person1 + '/' + imgs_person1[i]) #X1: [/F0123/MID1/P..., ...] 'length' times
                X2.append(person2 + '/' + imgs_person2[i])
                labels.append(tup[2])

        #X1 = [x[0] for x in batch_tuples] #x[0] = F0123/MID1/P01276_face0.jpg
        X1 = np.array([read_img(train_folders_path + x) for x in X1])
        X1a = copy.deepcopy(X1)

        #X2 = [x[1] for x in batch_tuples]
        X2 = np.array([read_img(train_folders_path + x) for x in X2])
        X2a = copy.deepcopy(X1)

        yield [X1, X2, X1a, X2a], np.array(labels)

In [None]:
###FACENET FAILS
'''
from tensorflow.keras.layers import MaxPooling2D
from keras.models import load_model

facenet_model = load_model('facenet_keras.h5')
facenet_model.load_weights('facenet_keras_weights.h5')
for layer in facenet_model.layers[:-3]:
    layer.trainable = True
facenet_model.summary()
'''
'''
new_layer = Dense(10, activation='softmax', name='my_dense')

inp = facenet_model.input
out = new_layer(facenet_model.layers[-1].output)

model2 = Model(inp, out)
model2.summary(line_length=150)
'''
'''
model_path = '/gdrive/MyDrive/facenet_keras.h5'
model_fn = load_model(model_path)
for layer in model_fn.layers[:-3]:
    layer.trainable = True
model_vgg = VGGFace(model='resnet50', include_top=False)
for layer in model_vgg.layers[:-3]:
    layer.trainable = True

def lol():
    input_1 = Input(shape=(IMG_SIZE_FN, IMG_SIZE_FN, 3))
    input_2 = Input(shape=(IMG_SIZE_FN, IMG_SIZE_FN, 3))
    input_3 = Input(shape=(IMG_SIZE_VGG, IMG_SIZE_VGG, 3))
    input_4 = Input(shape=(IMG_SIZE_VGG, IMG_SIZE_VGG, 3))

    x1 = model_fn(input_1)
    x2 = model_fn(input_2)
    x3 = model_vgg(input_3)
    x4 = model_vgg(input_4)
    
    x1 = Reshape((1, 1 ,128))(x1)
    x2 = Reshape((1, 1 ,128))(x2)
    x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)])
    x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)])

    x1t = Lambda(lambda tensor  : K.square(tensor))(x1)
    x2t = Lambda(lambda tensor  : K.square(tensor))(x2)
    x3t = Lambda(lambda tensor  : K.square(tensor))(x3)
    x4t = Lambda(lambda tensor  : K.square(tensor))(x4)
    
    merged_add_fn = Add()([x1, x2])
    merged_add_vgg = Add()([x3, x4])
    merged_sub1_fn = Subtract()([x1,x2])
    merged_sub1_vgg = Subtract()([x3,x4])
    merged_sub2_fn = Subtract()([x2,x1])
    merged_sub2_vgg = Subtract()([x4,x3])
    merged_mul1_fn = Multiply()([x1,x2])
    merged_mul1_vgg = Multiply()([x3,x4])
    merged_sq1_fn = Add()([x1t,x2t])
    merged_sq1_vgg = Add()([x3t,x4t])
    merged_sqrt_fn = Lambda(lambda tensor  : signed_sqrt(tensor))(merged_mul1_fn)
    merged_sqrt_vgg = Lambda(lambda tensor  : signed_sqrt(tensor))(merged_mul1_vgg)

    
    merged_add_vgg = Conv2D(128 , [1,1] )(merged_add_vgg)
    merged_sub1_vgg = Conv2D(128 , [1,1] )(merged_sub1_vgg)
    merged_sub2_vgg = Conv2D(128 , [1,1] )(merged_sub2_vgg)
    merged_mul1_vgg = Conv2D(128 , [1,1] )(merged_mul1_vgg)
    merged_sq1_vgg = Conv2D(128 , [1,1] )(merged_sq1_vgg)
    merged_sqrt_vgg = Conv2D(128 , [1,1] )(merged_sqrt_vgg)
    
    merged = Concatenate(axis=-1)([Flatten()(merged_add_vgg), (merged_add_fn), Flatten()(merged_sub1_vgg), (merged_sub1_fn),
                                   Flatten()(merged_sub2_vgg), (merged_sub2_fn), Flatten()(merged_mul1_vgg), (merged_mul1_fn), 
                                   Flatten()(merged_sq1_vgg), (merged_sq1_fn), Flatten()(merged_sqrt_vgg), (merged_sqrt_fn)])
    
    merged = Dense(100, activation="relu")(merged)
    merged = Dropout(0.1)(merged)
    merged = Dense(25, activation="relu")(merged)
    merged = Dropout(0.1)(merged)
    out = Dense(1, activation="sigmoid")(merged)

    model = Model([input_1, input_2, input_3, input_4], out)

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00001))

    model.summary()

    return model

    '''
    '''
def signed_sqrt(x):
    return K.sign(x)*K.sqrt(K.abs(x)+1e-9)
    '''

'''
def baseline_model():
    #FACENET
    facenet_model = load_model('/gdrive/MyDrive/facenet_keras.h5')
    for layer in facenet_model.layers[:-3]:
        layer.trainable = True
    #input
    fc_input_1 = Input(shape=(160, 160, 3))        
    fc_input_2 = Input(shape=(160, 160, 3))        
    #starting model
    fn_x1 = facenet_model(fc_input_1)
    fn_x2 = facenet_model(fc_input_2)
    #reshaping image array for global max pool layer
    fn_x1 = Reshape((1, 1 ,128))(fn_x1) 
    fn_x2 = Reshape((1, 1 ,128))(fn_x2)
    #combining inputs
    fn_x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(fn_x1), GlobalAvgPool2D()(fn_x1)])
    fn_x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(fn_x2), GlobalAvgPool2D()(fn_x2)])
    #adding potential features, concat to final layer before dense
    fn_add = Add()([fn_x1, fn_x2])
    fn_product = Multiply()([fn_x1,fn_x2])
    fn_x = Concatenate(axis=-1)([fn_add, fn_product])
'''


MODEL ARCHITECTURE

In [None]:
from tensorflow.keras.layers import Flatten, Add, BatchNormalization
from keras.models import load_model
from keras import backend as K

def baseline_model():

    ###VGG###
    vgg_model = VGGFace(model='vgg16', include_top=False)
    for x in vgg_model.layers[:-3]:
        x.trainable = True
    vgg_input_1 = Input(shape=(224, 224, 3))
    vgg_input_2 = Input(shape=(224, 224, 3))
    vgg_x1 = vgg_model(vgg_input_1)
    vgg_x2 = vgg_model(vgg_input_2)
    #flatten inputs
    vgg_x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(vgg_x1), GlobalAvgPool2D()(vgg_x1)])
    vgg_x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(vgg_x2), GlobalAvgPool2D()(vgg_x2)])
    #adding layers
    vgg_x3 = Subtract()([vgg_x1, vgg_x2]) #substract x1 and x2
    vgg_x3 = Multiply()([vgg_x3, vgg_x3]) #then square it
    vgg_x = Multiply()([vgg_x1, vgg_x2]) #multiply x1 and x2
    vgg_x = Concatenate(axis=-1)([vgg_x, vgg_x3]) #concatenate (multiply x1 and x2) with (substract x1 and x2, then square)

    
    ###RESNET###
    res_model = VGGFace(model='resnet50', include_top=False)
    for x in res_model.layers[:-3]:
        x.trainable = True
    res_input_1 = Input(shape=(224, 224, 3)) #input tensor shape
    res_input_2 = Input(shape=(224, 224, 3))
    res_input_1 = BatchNormalization(res_input_1)
    res_input_2 = BatchNormalization(res_input_2)
    res_x1 = res_model(res_input_1) #reshaping input of model to that of image shapes
    res_x2 = res_model(res_input_2) #requries two resnet archs
    #flatten inputs
    res_x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(res_x1), GlobalAvgPool2D()(res_x1)])
    res_x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(res_x2), GlobalAvgPool2D()(res_x2)])
    #adding potential features, concat to final layer before dense
    res_x3 = Subtract()([res_x2, res_x2])
    res_x3 = Multiply()([res_x3, res_x3])
    res_x1_ = Multiply()([res_x1, res_x1])
    res_x2_ = Multiply()([res_x2, res_x2])
    res_x4 = Subtract()([res_x1_, res_x2_])
    res_x = Concatenate(axis=-1)([res_x4, res_x3])
    

    #MERGE RESNET AND VGG
    merged = Concatenate(axis=-1)([vgg_x, res_x])
    merged = Dense(100, activation="relu")(merged)
    merged = Dropout(0.01)(merged)
    out = Dense(1, activation="sigmoid")(merged)
    
    model = Model([vgg_input_1, vgg_input_2, res_input_1, res_input_2], out)
    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.00001))

    model.summary()

    return model

MODEL AND CHECKPOINTS

In [None]:
'''
Save the best model to your drive after each training epoch so that you can come back to it. ReduceLROnPlateau reduces the learning rate when a metric has stopped improving, in this case the validation accuracy. 
'''
file_path = "/gdrive/MyDrive/vgg_face2.h5"
checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max')
reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.1, patience=20, verbose=1)
callbacks_list = [checkpoint, reduce_on_plateau]
model = baseline_model()

FIT MODEL

In [None]:
model.fit(gen(train, train_person_to_images_map, batch_size=16), use_multiprocessing=False,
                validation_data=gen(val, val_person_to_images_map, batch_size=16), epochs=25, verbose=1,
                workers=1, callbacks=callbacks_list, steps_per_epoch=100, validation_steps=50)


PREDICTIONS

In [None]:
# Modify paths as per your need
test_path = "/gdrive/MyDrive/Kinship Recognition Starter/test/"

#model = baseline_model()
#model.load_weights("/gdrive/MyDrive/vgg_face.h5")

submission = pd.read_csv('/gdrive/MyDrive/Kinship Recognition Starter/test_ds.csv')
predictions = []

for i in range(0, len(submission.p1.values), 32):
    if i%64 == 0:
      print(i)
    X1 = submission.p1.values[i:i+32]
    X1 = np.array([read_img(test_path + x) for x in X1])

    X1a = submission.p1.values[i:i+32]
    X1a = np.array([read_img(test_path + x) for x in X1a])

    X2 = submission.p2.values[i:i+32]
    X2 = np.array([read_img(test_path + x) for x in X2])

    X2a = submission.p2.values[i:i+32]
    X2a = np.array([read_img(test_path + x) for x in X2a])

    pred = model.predict([X1, X2, X1a, X2a]).ravel().tolist()
    predictions += pred

CREATE CSV TO SUBMIT

In [None]:
d = {'index': np.arange(0, 3000, 1), 'label':predictions}
submissionfile = pd.DataFrame(data=d)
submissionfile = submissionfile.round()
submissionfile.astype("int64").to_csv("/gdrive/MyDrive/tojo2.csv", index=False)