# Kinship Verification

In [1]:
from glob import glob
import pandas as pd
import numpy as np
from random import choice, sample
from keras.utils.vis_utils import plot_model

from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D, Concatenate, Multiply, Dropout, Subtract
from keras.models import Model
from keras.optimizers import Adam
from keras_vggface.utils import preprocess_input
from keras_vggface.vggface import VGGFace
import cv2


In [2]:
train_file_path = "input\\train_relationships.csv"
train_folders_path = "input\\train\\"

In [3]:
all_images = glob(train_folders_path + "*/*/*.jpg") # getting all images


In [4]:
print(len(all_images))
print(all_images[:5])

12379
['input\\train\\F0002\\MID1\\P00009_face3.jpg', 'input\\train\\F0002\\MID1\\P00010_face4.jpg', 'input\\train\\F0002\\MID1\\P00011_face1.jpg', 'input\\train\\F0002\\MID1\\P00012_face2.jpg', 'input\\train\\F0002\\MID1\\P00013_face2.jpg']


In [5]:
val_famillies = "F09"
train_images = [x for x in all_images if val_famillies not in x]
val_images = [x for x in all_images if val_famillies in x]

In [6]:
print(len(train_images))
print(len(val_images))

11232
1147


In [7]:
from collections import defaultdict
train_person_to_images_map = defaultdict(list)

In [8]:
print(train_person_to_images_map)

defaultdict(<class 'list'>, {})


In [9]:
for x in train_images:
    train_person_to_images_map[x.split("\\")[-3] + "\\" + x.split("\\")[-2]].append(x)

In [10]:
print(len(train_person_to_images_map.keys()))
print(list(train_person_to_images_map.items())[0])

2085
('F0002\\MID1', ['input\\train\\F0002\\MID1\\P00009_face3.jpg', 'input\\train\\F0002\\MID1\\P00010_face4.jpg', 'input\\train\\F0002\\MID1\\P00011_face1.jpg', 'input\\train\\F0002\\MID1\\P00012_face2.jpg', 'input\\train\\F0002\\MID1\\P00013_face2.jpg', 'input\\train\\F0002\\MID1\\P00014_face2.jpg', 'input\\train\\F0002\\MID1\\P00015_face2.jpg', 'input\\train\\F0002\\MID1\\P00016_face2.jpg', 'input\\train\\F0002\\MID1\\P00017_face3.jpg', 'input\\train\\F0002\\MID1\\P00018_face1.jpg'])


In [11]:
val_person_to_images_map = defaultdict(list)

for x in val_images:
    val_person_to_images_map[x.split("\\")[-3] + "\\" + x.split("\\")[-2]].append(x)

In [12]:
print(len(val_person_to_images_map.keys()))
print(list(val_person_to_images_map.items())[0])

231
('F0900\\MID1', ['input\\train\\F0900\\MID1\\P09505_face1.jpg', 'input\\train\\F0900\\MID1\\P09506_face1.jpg', 'input\\train\\F0900\\MID1\\P09508_face1.jpg', 'input\\train\\F0900\\MID1\\P09509_face1.jpg', 'input\\train\\F0900\\MID1\\P09513_face1.jpg'])


In [13]:
ppl = [x.split("\\")[-3] + "\\" + x.split("\\")[-2] for x in all_images]
print(len(ppl))
print(ppl[0])

12379
F0002\MID1


In [14]:
relationships = pd.read_csv(train_file_path)

In [15]:
relationships = list(zip(relationships.p1.values, relationships.p2.values))
relationships = [x for x in relationships if x[0] in ppl and x[1] in ppl]


In [16]:
print(len(relationships))
print(relationships[:4])

3362
[('F0002\\MID1', 'F0002\\MID3'), ('F0002\\MID2', 'F0002\\MID3'), ('F0005\\MID1', 'F0005\\MID2'), ('F0005\\MID3', 'F0005\\MID2')]


In [17]:
train = [x for x in relationships if val_famillies not in x[0]]
val = [x for x in relationships if val_famillies in x[0]]

In [18]:
print(len(train))
print(len(val))
print(train[:3])

3066
296
[('F0002\\MID1', 'F0002\\MID3'), ('F0002\\MID2', 'F0002\\MID3'), ('F0005\\MID1', 'F0005\\MID2')]


In [19]:
def read_img(path):
    img = cv2.imread(path)
    img = np.array(img).astype(np.float64)
    return preprocess_input(img, version=2)


def gen(list_tuples, person_to_images_map, batch_size=16):
    ppl = list(person_to_images_map.keys()) # list of all people
    while True:
        batch_tuples = sample(list_tuples, batch_size // 2)
        labels = [1] * len(batch_tuples) # postive samples
        while len(batch_tuples) < batch_size:
            p1 = choice(ppl)
            p2 = choice(ppl)

            if p1 != p2 and (p1, p2) not in list_tuples and (p2, p1) not in list_tuples:
                batch_tuples.append((p1, p2))
                labels.append(0) # negative samples

        for x in batch_tuples:
            if not len(person_to_images_map[x[0]]):
                print(x[0])

        X1 = [choice(person_to_images_map[x[0]]) for x in batch_tuples]
        X1 = np.array([read_img(x) for x in X1])

        X2 = [choice(person_to_images_map[x[1]]) for x in batch_tuples]
        X2 = np.array([read_img(x) for x in X2])
        labels = np.asarray(labels).astype('float32').reshape((-1,1))
        yield [X1, X2], labels


In [1]:
# Model

def baseline_model():
    input_1 = Input(shape=(224, 224, 3))
    input_2 = Input(shape=(224, 224, 3))

    base_model = VGGFace(model='resnet50', include_top=False)

    for x in base_model.layers[:-3]:
        x.trainable = True

    x1 = base_model(input_1)
    x2 = base_model(input_2)

    x1 = Concatenate(axis=-1)([GlobalMaxPool2D()(x1), GlobalAvgPool2D()(x1)])
    x2 = Concatenate(axis=-1)([GlobalMaxPool2D()(x2), GlobalAvgPool2D()(x2)])

    x3 = Subtract()([x1, x2])
    x3 = Multiply()([x3, x3])

    x = Multiply()([x1, x2])

    x = Concatenate(axis=-1)([x, x3])

    x = Dense(100, activation="relu")(x)
    x = Dropout(0.01)(x)
    out = Dense(1, activation="sigmoid")(x)

    model = Model([input_1, input_2], out)

    model.compile(loss="binary_crossentropy", metrics=['acc'], optimizer=Adam(0.0001))

    model.summary()
    
    plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
    return model

In [21]:
file_path = "vgg_face.h5"

checkpoint = ModelCheckpoint(file_path, monitor='acc', verbose=1, save_best_only=True, mode='max')

reduce_on_plateau = ReduceLROnPlateau(monitor="val_acc", mode="max", factor=0.1, patience=3, verbose=1)

callbacks_list = [checkpoint, reduce_on_plateau]

model = baseline_model()
# model.load_weights(file_path)
model.fit(gen(train, train_person_to_images_map, batch_size=16),
                    validation_data=gen(val, val_person_to_images_map, batch_size=16), epochs=30, verbose=1,
                    callbacks=callbacks_list, steps_per_epoch=50, validation_steps=10)

test_path = "input/test/"


def chunker(seq, size=32):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))


from tqdm import tqdm

submission = pd.read_csv('sample_submission.csv')

predictions = []

for batch in tqdm(chunker(submission.img_pair.values)):
    X1 = [x.split("-")[0] for x in batch]
    X1 = np.array([read_img(test_path + x) for x in X1])

    X2 = [x.split("-")[1] for x in batch]
    X2 = np.array([read_img(test_path + x) for x in X2])

    pred = model.predict([X1, X2]).ravel().tolist()
    predictions += pred

submission['is_related'] = predictions

submission.to_csv("vgg_face.csv", index=False)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 input_2 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 vggface_resnet50 (Functional)  (None, None, None,   23561152    ['input_1[0][0]',                
                                2048)                             'input_2[0][0]']            

KeyboardInterrupt: 