This kernel is clone of this nice kernel: https://www.kaggle.com/suicaokhoailang/facenet-baseline-in-keras-0-749-lb Just add pretrained vggface model and take the average of the predictions of the two models.

In [18]:
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
from imageio import imread
from skimage.transform import resize
from scipy.spatial import distance
from keras.models import load_model
import pandas as pd
from tqdm import tqdm

In [19]:
train_df = pd.read_csv("../input/recognizing-faces-in-the-wild/train_relationships.csv")
test_df = pd.read_csv("../input/recognizing-faces-in-the-wild/sample_submission.csv")

Let's load our pretrained model.

In [20]:
!pip install git+https://github.com/rcmalli/keras-vggface.git

Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-llk1o2si
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... [?25ldone
[?25h  Stored in directory: /tmp/pip-ephem-wheel-cache-fe2cf9ln/wheels/36/07/46/06c25ce8e9cd396dabe151ea1d8a2bc28dafcb11321c1f3a6d
Successfully built keras-vggface
[33mYou are using pip version 19.0.3, however version 19.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [21]:
from keras_applications.imagenet_utils import _obtain_input_shape
from keras_vggface.vggface import VGGFace

# Convolution Features
vgg_features = VGGFace(include_top=False, input_shape=(160, 160, 3), pooling='avg')
model = vgg_features

Here's preprocessing stuff. The images from the test set seem to already be aligned, so I'll omit that part here for clarity.

In [22]:
def prewhiten(x):
    if x.ndim == 4:
        axis = (1, 2, 3)
        size = x[0].size
    elif x.ndim == 3:
        axis = (0, 1, 2)
        size = x.size
    else:
        raise ValueError('Dimension should be 3 or 4')

    mean = np.mean(x, axis=axis, keepdims=True)
    std = np.std(x, axis=axis, keepdims=True)
    std_adj = np.maximum(std, 1.0/np.sqrt(size))
    y = (x - mean) / std_adj
    return y

def l2_normalize(x, axis=-1, epsilon=1e-10):
    output = x / np.sqrt(np.maximum(np.sum(np.square(x), axis=axis, keepdims=True), epsilon))
    return output

def load_and_align_images(filepaths, margin,image_size = 160):
    
    aligned_images = []
    for filepath in filepaths:
        img = imread(filepath)
        aligned = resize(img, (image_size, image_size), mode='reflect')
        aligned_images.append(aligned)
            
    return np.array(aligned_images)



Here we'll compute all the embeddings for the test images using the pretrained model

In [23]:
def calc_embs(filepaths, margin=10, batch_size=512):
    pd = []
    for start in tqdm(range(0, len(filepaths), batch_size)):
        aligned_images = prewhiten(load_and_align_images(filepaths[start:start+batch_size], margin))
        pd.append(model.predict_on_batch(aligned_images))
    embs = l2_normalize(np.concatenate(pd))

    return embs

In [24]:
test_images = os.listdir("../input/recognizing-faces-in-the-wild/test/")
test_embs = calc_embs([os.path.join("../input/recognizing-faces-in-the-wild/test/", f) for f in test_images])
np.save("test_embs_vgg.npy", test_embs)

100%|██████████| 13/13 [01:09<00:00,  4.39s/it]


In [26]:
test_embs.shape

(6282, 512)

FaceNet model

In [27]:
model_path = '../input/facenet-keras/facenet_keras.h5'
model = load_model(model_path)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




In [28]:
test_embs_vgg = calc_embs([os.path.join("../input/recognizing-faces-in-the-wild/test/", f) for f in test_images])
np.save("test_embs_fnet.npy", test_embs_vgg)

100%|██████████| 13/13 [01:10<00:00,  4.43s/it]


In [29]:
test_embs_vgg.shape

(6282, 128)

In [31]:
test_df["distance"] = 0
img2idx = dict()
for idx, img in enumerate(test_images):
    img2idx[img] = idx

Next, we compute the actual distance between provided image pairs

In [38]:
for idx, row in tqdm(test_df.iterrows(), total=len(test_df)):
    imgs = [test_embs[img2idx[img]] for img in row.img_pair.split("-")]
    test_df.loc[idx, "distance1"] = distance.euclidean(*imgs)
    
    # For vggface
    imgs_2 = [test_embs_vgg[img2idx[img]] for img in row.img_pair.split("-")]
    test_df.loc[idx, "distance2"] = distance.euclidean(*imgs_2)

100%|██████████| 5310/5310 [00:05<00:00, 906.96it/s]


In [42]:
test_df['distance'] = test_df[['distance1','distance2']].mean(axis=1)
test_df.head()

Unnamed: 0,img_pair,is_related,distance,distance2,distance1
0,face05508.jpg-face01210.jpg,0,1.284604,1.466604,1.102604
1,face05750.jpg-face00898.jpg,0,1.241447,1.390086,1.092807
2,face05820.jpg-face03938.jpg,0,1.255383,1.460218,1.050548
3,face02104.jpg-face01172.jpg,0,1.099331,1.278298,0.920364
4,face02428.jpg-face05611.jpg,0,1.10086,1.225088,0.976633


Finally, we convert the distances to probabiliy values and submit the result

In [43]:
all_distances = test_df.distance.values
sum_dist = np.sum(all_distances)

In [44]:
probs = []
for dist in tqdm(all_distances):
    prob = np.sum(all_distances[np.where(all_distances <= dist)[0]])/sum_dist
    probs.append(1 - prob)

100%|██████████| 5310/5310 [00:00<00:00, 30981.31it/s]


In [45]:
sub_df = pd.read_csv("../input/recognizing-faces-in-the-wild/sample_submission.csv")
sub_df.is_related = probs
sub_df.to_csv("submission.csv", index=False)

In [46]:
sub_df.head()

Unnamed: 0,img_pair,is_related
0,face05508.jpg-face01210.jpg,0.124012
1,face05750.jpg-face00898.jpg,0.281501
2,face05820.jpg-face03938.jpg,0.221899
3,face02104.jpg-face01172.jpg,0.753986
4,face02428.jpg-face05611.jpg,0.749834
