In [None]:
!pip install keras_vggface

In [None]:
!git clone https://github.com/plutasnyy/kinship-relationship-recognition.git

In [26]:
import pandas as pd
import glob
import numpy as np
from random import sample
from keras import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras_vggface import VGGFace, utils
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from sklearn.utils import shuffle
DATA_PATH = '/content/kinship-relationship-recognition/data/'

RELATIONSHIPS_PATH = DATA_PATH + 'train_relationships.csv'
IMAGE_EXPRESSION = DATA_PATH + 'train/{}/*'
PEOPLE_NAMES_EXPRESSION = DATA_PATH + 'train/*/*'
TRAIN_PATH = DATA_PATH + 'train/'
TEST_PATH = DATA_PATH + 'test/'
CSV_TEST = DATA_PATH + 'sample_submission.csv'

HALF_DATA_SET_SIZE = 150000
print("START")

def is_pair_in_relation(relations, pair):
    p1, p2 = pair
    if p1 == p2:
        return True
    return relations.query('p1=="{}" and p2=="{}" or p1=="{}" and p2=="{}"'.format(p1, p2, p2, p1))['p1'].count() != 0


def clean_relations_batch(relations_batch, people_names):
    relations = relations_batch.values
    result = dict()
    result['p1'], result['p2'] = list(), list()
    for p1, p2 in relations:
        # TODO instead of pd -> numpy -> pd filter on data frame
        if p1 in people_names and p2 in people_names:
            result['p1'].append(p1)
            result['p2'].append(p2)
    return pd.DataFrame.from_dict(result)

print('.',end='',flush=True)
relations_df = pd.read_csv(RELATIONSHIPS_PATH)


print('.',end='',flush=True)
names = glob.glob(PEOPLE_NAMES_EXPRESSION)
people_names = [name.replace(TRAIN_PATH, '') for name in names]

print('.',end='',flush=True)
relations_df = clean_relations_batch(relations_df, people_names)
relations_batch = relations_df.sample(HALF_DATA_SET_SIZE, replace=True).values.tolist()

print('.',end='',flush=True)
not_relations_batch = list()

while len(not_relations_batch) < HALF_DATA_SET_SIZE:
    random_pair = sample(people_names, 2)
    if not is_pair_in_relation(relations_df, random_pair):
        not_relations_batch.append(random_pair)
print("Done")

START
....Construct batch
100


In [None]:
from keras.preprocessing.image import ImageDataGenerator

model = VGGFace(model='resnet50',  include_top=False, input_shape=(224, 224, 3), pooling='avg')
datagen = ImageDataGenerator(
    rotation_range=20,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

class EmptyFolderSadSituation(BaseException):
    pass

def prepare_image(path, transform = False):
    img = image.load_img(path, target_size=(224, 224,3))
    x = image.img_to_array(img)
    if transform:
        x = datagen.random_transform(x)
    x = np.expand_dims(x, axis=0)
    x = utils.preprocess_input(x, version=2)
    preds = model.predict(x)
    return preds

def preprocess_image(person_directory_path):
    paths = glob.glob(person_directory_path)
    if len(paths) == 0:
        raise EmptyFolderSadSituation
    image_path = sample(paths, 1)[0]
    x = prepare_image(image_path, True)
    return x

  

def construct_batch(relations_list):
    pairs = list()
    Y = list()
    z=0
    for i, relations in enumerate(relations_list):
        for left_image_name, right_image_name in relations:
            try:
                left = preprocess_image(IMAGE_EXPRESSION.format(left_image_name))
                right = preprocess_image(IMAGE_EXPRESSION.format(right_image_name))
                pairs.append([left,right])
                # pairs.append(np.concatenate((left,right),axis=None))
                Y += [i]
            except EmptyFolderSadSituation:
                pass # really sad
            z+=1
            if z%2500 == 0:
                print(z)
    return pairs, Y
  
print("Construct batch")
pairs, Y = construct_batch([not_relations_batch, relations_batch])
print("Done")


In [27]:
X = np.array(pairs)
y = np.array(Y)

left = X[:,0]
right = X[:,1]
print('.',end='',flush=True)
# left_result, right_result = model.predict(left), model.predict(right)
print('.',end='',flush=True)
X = np.concatenate((left,right),axis=2).squeeze(axis=1)
print(X.shape)
print('Data loaded')

..(100, 4096)
Data loaded


In [28]:

from sklearn.utils import shuffle
from keras.layers import Dropout

X,y = shuffle(X,y)
model2 = Sequential()
model2.add(Dense(X.shape[1], activation='relu'))
model2.add(Dropout(0.4))
model2.add(Dense(1024, activation='relu'))
model2.add(Dropout(0.4))
model2.add(Dense(128, activation='relu'))
model2.add(Dropout(0.3))
model2.add(Dense(1, activation='sigmoid'))
from keras.optimizers import SGD
adam = Adam(lr=0.000001)
model2.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
model2.fit(X, y, epochs=30,validation_split=0.3)

Train on 80 samples, validate on 20 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f2fa99b6470>

In [37]:
def create_vector_from_images(img1_path,img2_path):
    img1 = prepare_image(img1_path)
    img2 = prepare_image(img2_path)
    X = np.concatenate((img1,img2),axis=1)
    return X
    
df = pd.read_csv(CSV_TEST)
print(df.head())
for index, row in df.iterrows():
    pair = row['img_pair']
    left_img, right_img = pair.split('-')
    X = create_vector_from_images(TEST_PATH+left_img, TEST_PATH+right_img)
    
    result = model2.predict(X)
    if result[0][0] >= 0.5:
        print('-',end='',flush=True)
        df.loc[index, 'is_related']=1
    else:
        print('.',end='',flush=True)
df.to_csv('results.csv', index=False)
print(df.head())

                      img_pair  is_related
0  face05508.jpg-face01210.jpg           0
1  face05750.jpg-face00898.jpg           0
2  face05820.jpg-face03938.jpg           0
3  face02104.jpg-face01172.jpg           0
4  face02428.jpg-face05611.jpg           0
[[2.927374   0.         0.03122351 ... 1.8139669  0.0205928  0.        ]]
[[0.3652932]]
face01210.jpg
                      img_pair  is_related
0  face05508.jpg-face01210.jpg           0
1  face05750.jpg-face00898.jpg           0
2  face05820.jpg-face03938.jpg           0
3  face02104.jpg-face01172.jpg           0
4  face02428.jpg-face05611.jpg           0
