In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [3]:
%%capture
!pip install keras_vggface
!pip install keras_applications
#!pip install numpy==1.19.5

In [4]:
from collections import defaultdict
from glob import glob
from random import choice, sample

import tensorflow as tf
import keras
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract, Average, Reshape, Lambda, Add, Conv2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import Adamax
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
from keras.models import load_model
from keras import backend as K

In [5]:
# Modify paths as per your method of saving them
train_file_path = "/gdrive/MyDrive/Kinship Recognition Starter/train_ds.csv"
train_folders_path = "/gdrive/MyDrive/Kinship Recognition Starter/train/train-faces/"
# All images belonging to families F09** will be used to create the validation set while training the model
# For final submission, you can add these to the training data as well
val_famillies = "F09"

In [6]:
all_images = glob(train_folders_path + "*/*/*.jpg")

train_images = [x for x in all_images if val_famillies not in x]
val_images = [x for x in all_images if val_famillies in x]

train_person_to_images_map = defaultdict(list)

ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in all_images]

for x in train_images:
    train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

val_person_to_images_map = defaultdict(list)

for x in val_images:
    val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

In [7]:
relationships = pd.read_csv(train_file_path)
relationships = list(zip(relationships.p1.values, relationships.p2.values, relationships.relationship.values))
relationships = [(x[0],x[1],x[2]) for x in relationships if x[0][:10] in ppl and x[1][:10] in ppl]

train = [x for x in relationships if val_famillies not in x[0]]
val = [x for x in relationships if val_famillies in x[0]]

In [8]:
from keras.preprocessing import image
#For VGGFace we need 224 x 224
def read_img(path):
    img = image.load_img(path, target_size=(224, 224))
    img = np.array(img).astype(np.float)
    return preprocess_input(img, version=2)
#For FaceNet we need 160 x 160
def read_img2(path):
    img = image.load_img(path, target_size=(160, 160))
    img = np.array(img).astype(np.float)
    return preprocess_input(img, version=2)

In [9]:
def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys())
    while True:
        batch_tuples = sample(list_tuples, batch_size)
        
        # All the samples are taken from train_ds.csv, labels are in the labels column
        labels = []
        for tup in batch_tuples:
          labels.append(tup[2])
        
        #creating 4 inputs - 2 for VGG Face and 2 for FaceNet. Since FaceNet format is 160 x 160 we must use read_img2. 
        X1 = [x[0] for x in batch_tuples]
        X1 = np.array([read_img(train_folders_path + x) for x in X1])
        X1_2 = np.array([read_img2(train_folders_path + x) for x in X1])

        X2 = [x[1] for x in batch_tuples]
        X2 = np.array([read_img(train_folders_path + x) for x in X2])
        X2_2 = np.array([read_img2(train_folders_path + x) for x in X2])

        yield [X1_1, X2_1, X1_2, X2_2], np.array(labels)

In [10]:
from tensorflow.python.framework.ops import disable_eager_execution 
disable_eager_execution()

def baseline_model():
    
    # 4 inputs - 2 for VGG Face and 2 for FaceNet
    
    #VGGFace input
    input_1 = Input(shape=(224, 224, 3))
    input_2 = Input(shape=(224, 224, 3))
    #FaceNet input
    input_3 = Input(shape=(160, 160, 3))
    input_4 = Input(shape=(160, 160, 3)) 

    #loading VGGFace and FaceNet models
    base_model = VGGFace(model='resnet50', include_top=False)
    base_model2 = load_model('/gdrive/MyDrive/Kinship Recognition Starter/facenet_keras.h5')
    
    for x in base_model.layers[:-3]:
        x.trainable = True

    for x in base_model2.layers[:-3]:
        x.trainable = True

    #creating embedding based on both models
    x1 = base_model(input_1)
    x2 = base_model(input_2)
    x3 = Reshape((1, 1 ,128))(base_model2(input_3))
    x4 = Reshape((1, 1 ,128))(base_model2(input_4))

    #Applying Various Operations
    x5 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1), GlobalMaxPool2D()(x3), GlobalAvgPool2D()(x3)])
    x6 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2), GlobalMaxPool2D()(x4), GlobalAvgPool2D()(x4)])
    x7 = Subtract()([x5, x6])
    x8 = Add()([x5, x6])
    x9 = Multiply()([x5, x6])
    
    #Concatenating
    x = Concatenate(axis=-1)([x7, x8, x9])
    
    #Dense Layers + Dropout Regularization
    x = Dense(250, activation="relu")(x)
    x = Dropout(0.2)(x)
    x = Dense(100, activation="relu")(x)
    x = Dropout(0.2)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model([input_1, input_2, input_3, input_4], out)

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adamax(0.0001))

    model.summary()

    return model


In [None]:
file_path = "/gdrive/MyDrive/vgg_face.h5"

checkpoint = ModelCheckpoint(file_path, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max')

reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.1, patience=20, verbose=1)

callbacks_list = [checkpoint, reduce_on_plateau]

model = baseline_model()
model.fit(gen(train, train_person_to_images_map, batch_size=16), use_multiprocessing=False,
                validation_data=gen(val, val_person_to_images_map, batch_size=16), epochs=25, verbose=1, steps_per_epoch=100,
                workers=1, validation_steps=50, callbacks=callbacks_list)

In [None]:
# Modify paths as per your need
test_path = "/gdrive/MyDrive/Kinship Recognition Starter/test/"

model = baseline_model()
model.load_weights("/gdrive/MyDrive/vgg_face.h5")

submission = pd.read_csv('/gdrive/MyDrive/Kinship Recognition Starter/test_ds.csv')
predictions = []

for i in range(0, len(submission.p1.values), 32):
    X1 = submission.p1.values[i:i+32]
    X1 = np.array([read_img(test_path + x) for x in X1])

    X2 = submission.p2.values[i:i+32]
    X2 = np.array([read_img(test_path + x) for x in X2])

    pred = model.predict([X1, X2]).ravel().tolist()
    predictions += pred

In [None]:
d = {'index': np.arange(0, 3000, 1), 'label':predictions}
submissionfile = pd.DataFrame(data=d)
submissionfile = submissionfile.round()

In [None]:
submissionfile.astype("int64").to_csv("/gdrive/MyDrive/Kinship Recognition Starter/sv2637.csv", index=False)