In [2]:
import pandas as pd
import glob
import numpy as np
from random import sample
from keras import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras_vggface import VGGFace, utils
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from sklearn.utils import shuffle

DATA_PATH = '../data/'

RELATIONSHIPS_PATH = DATA_PATH + 'train_relationships.csv'
IMAGE_EXPRESSION = DATA_PATH + 'train/{}/*'
PEOPLE_NAMES_EXPRESSION = DATA_PATH + 'train/*/*'
TRAIN_PATH = DATA_PATH + 'train/'

HALF_DATA_SET_SIZE = 200
print("START")
model = VGGFace(model='resnet50',  include_top=False, input_shape=(224, 224, 3), pooling='avg')

class EmptyFolderSadSituation(BaseException):
    pass

def preprocess_image(person_directory_path):
    paths = glob.glob(person_directory_path)
    if len(paths) == 0:
        raise EmptyFolderSadSituation
    image_path = sample(paths, 1)[0]
    img = image.load_img(image_path, target_size=(224, 224,3))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = utils.preprocess_input(x, version=1)
    preds = model.predict(x)
    return preds

def is_pair_in_relation(relations, pair):
    p1, p2 = pair
    if p1 == p2:
        return True
    return relations.query('p1=="{}" and p2=="{}" or p1=="{}" and p2=="{}"'.format(p1, p2, p2, p1))['p1'].count() != 0


def clean_relations_batch(relations_batch, people_names):
    relations = relations_batch.values
    result = dict()
    result['p1'], result['p2'] = list(), list()
    for p1, p2 in relations:
        # TODO instead of pd -> numpy -> pd filter on data frame
        if p1 in people_names and p2 in people_names:
            result['p1'].append(p1)
            result['p2'].append(p2)
    return pd.DataFrame.from_dict(result)


def construct_batch(relations_list):
    pairs = list()
    Y = list()
    z=0
    for i, relations in enumerate(relations_list):
        for left_image_name, right_image_name in relations:
            try:
                left = preprocess_image(IMAGE_EXPRESSION.format(left_image_name))
                right = preprocess_image(IMAGE_EXPRESSION.format(right_image_name))
                pairs.append(np.concatenate((left,right),axis=None))
                Y += [i]
            except EmptyFolderSadSituation:
                pass # really sad
            z+=1
            if z%100 == 0:
                print(z)
    return pairs, Y


relations_df = pd.read_csv(RELATIONSHIPS_PATH)

names = glob.glob(PEOPLE_NAMES_EXPRESSION)
people_names = [name.replace(TRAIN_PATH, '') for name in names]

relations_df = clean_relations_batch(relations_df, people_names)
relations_batch = relations_df.sample(HALF_DATA_SET_SIZE, replace=True).values.tolist()

not_relations_batch = list()

while len(not_relations_batch) < HALF_DATA_SET_SIZE:
    random_pair = sample(people_names, 2)
    if not is_pair_in_relation(relations_df, random_pair):
        not_relations_batch.append(random_pair)

print("Construct batch")
pairs, Y = construct_batch([not_relations_batch, relations_batch])

X = np.array(pairs)
y = np.array(Y)

print("Done")


Using TensorFlow backend.


START
Contruct batch
100
200
300
400
Done


In [5]:

from sklearn.utils import shuffle
X,y = shuffle(X,y)
model2 = Sequential()
model2.add(Dense(X.shape[1], activation='relu'))
model2.add(Dense(1024, activation='relu'))
model2.add(Dense(256, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

adam = Adam(lr=0.01)
model2.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
model2.fit(X, y, epochs=10,validation_split=0.2)

Train on 320 samples, validate on 80 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc3a817b358>