In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

In [2]:
relationships = pd.read_csv('train_relationships.csv')
relationships.head()

Unnamed: 0,p1,p2
0,F0002/MID1,F0002/MID3
1,F0002/MID2,F0002/MID3
2,F0005/MID1,F0005/MID2
3,F0005/MID3,F0005/MID2
4,F0009/MID1,F0009/MID4


In [3]:
plt.imread('train/F0002/MID1/P00012_face2.jpg').shape

(224, 224, 3)

In [4]:
from keras.models import Model
from keras.layers import Convolution2D, Flatten
from keras.layers import Dense, Input, Dropout, concatenate

Using TensorFlow backend.


In [13]:
inp1 = Input(shape = (224,224,3))
x = Convolution2D(filters=32, kernel_size=(3, 3), strides=(2, 2), activation='relu')(inp1)
x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=128, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=256, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=512, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Flatten()(x)
x = Dropout(rate=0.5)(x)
x1 = Dense(64,activation='relu')(x)

inp2 = Input(shape = (224,224,3))
x = Convolution2D(filters=32, kernel_size=(3, 3), strides=(2, 2), activation='relu')(inp2)
x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=128, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=256, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=512, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Flatten()(x)
x = Dropout(rate=0.5)(x)
x2 = Dense(64,activation='relu')(x)

x = concatenate([x1,x2])
output = (Dense(1,activation='sigmoid'))(x)

model = Model(inputs=[inp1,inp2],outputs=output)
model.compile(optimizer='adam',loss='binary_crossentropy')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv2d_11 (Conv2D)              (None, 111, 111, 32) 896         input_3[0][0]                    
__________________________________________________________________________________________________
conv2d_16 (Conv2D)              (None, 111, 111, 32) 896         input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_12 

In [6]:
def prepare_pair(str1,str2):
    # return the images provided current string path as arrays
    a = plt.imread(str1)
    b = plt.imread(str2)
    return (a,b)

def make_pairs(str1,str2):
    # prepare a list of all image pairs for two folders
    l1 = glob.glob('train/'+str1+'/*')
    l2 = glob.glob('train/'+str2+'/*')
    if (len(l1)>len(l2)):
        l1,l2 = l2,l1
    output = []
    for i in range(len(l1)):
        for j in range(i,len(l2)):
            output.append((l1[i],l2[j]))
    return output

def generate_batch(i,j=None):
    # make a batch ready for training
    # i for a single entry from the pairs dataframe
    # i,j for two individuals that are not related
    if (j is None):
        related = True
        pairs = make_pairs(*list(relationships.iloc[i]))
    else:
        related = False
        pairs = make_pairs(i,j)
    size = len(pairs)
    X1 = np.zeros((size,224,224,3))
    X2 = np.zeros((size,224,224,3))
    if (related):
        y = np.ones((size),dtype=int)
    else:
        y = np.zeros((size),dtype=int)
    i = 0
    for pair in pairs:
        X1[i],X2[i] = prepare_pair(*pair)
        i+=1
    X1/=255.
    X2/=255.
    return X1, X2, y

In [7]:
# related
a,b,c = generate_batch(222)
a.shape,b.shape,c.shape

((7, 224, 224, 3), (7, 224, 224, 3), (7,))

In [8]:
# not related
a,b,c = generate_batch(relationships.iloc[3][0],relationships.iloc[123][1])
a.shape,b.shape,c.shape

((18, 224, 224, 3), (18, 224, 224, 3), (18,))

In [9]:
import datetime
datetime.datetime.now()

datetime.datetime(2019, 6, 11, 10, 20, 24, 483764)

In [10]:
datetime.datetime.now().hour

10

In [11]:
# training
total_related = 0
total_unrelated = 0

iteration = 0

for iteration in range(300):
    if (iteration%100==0):
        print('Epoch:',iteration)
    if (total_related-total_unrelated<=0):
        # one batch on related unless there is imbalance
        i = np.random.randint(len(relationships))
        X1, X2, y = generate_batch(i)
        total_related+=len(y)
        #print('\trelated',len(X1))
        #model.train_on_batch([X1,X2],y)
    if (total_related-total_unrelated>=0):
        # one batch on unrelated unless there is imbalance
        i = np.random.randint(len(relationships))
        j = i
        while (j==i):
            j = np.random.randint(len(relationships))
        a = relationships.iloc[i][np.random.randint(2)]
        b = relationships.iloc[j][np.random.randint(2)]
        aX1, aX2, ay = generate_batch(a,b)
        total_unrelated+=len(y)
        #print('\tunrelated',len(X2))
        #model.train_on_batch([X1,X2],y)
    
    model.train_on_batch([])

Epoch: 0
Instructions for updating:
Use tf.cast instead.
Epoch: 100
Epoch: 200


In [12]:
# evaluation
for epoch in range(10):
    print('Epoch:',epoch)
    # one batch on related
    i = np.random.randint(len(relationships))
    X1, X2, y = generate_batch(i)
    if (len(y)>0):
        p = model.predict([X1,X2])
        p[p>0.5]=1
        p[p<=0.5]=0
        print('\trelated',len(X1),(y==p).mean())
    # one batch on unrelated
    i = np.random.randint(len(relationships))
    j = i
    while (j==i):
        j = np.random.randint(len(relationships))
    a = relationships.iloc[i][np.random.randint(2)]
    b = relationships.iloc[j][np.random.randint(2)]
    X1, X2, y = generate_batch(a,b)
    if (len(y)>0):
        p = model.predict([X1,X2])
        p[p>0.5]=1
        p[p<=0.5]=0
        print('\tunrelated',len(X1),(y==p).mean())

Epoch: 0
	related 36 1.0
	unrelated 3 0.0
Epoch: 1
	unrelated 1 0.0
Epoch: 2
	related 2 1.0
	unrelated 18 0.0
Epoch: 3
	related 41 1.0
	unrelated 42 0.0
Epoch: 4
	related 14 1.0
	unrelated 5 0.0
Epoch: 5
	related 22 1.0
	unrelated 42 0.0
Epoch: 6
	related 21 1.0
Epoch: 7
	related 27 1.0
	unrelated 20 0.0
Epoch: 8
	related 6 1.0
	unrelated 21 0.0
Epoch: 9
	related 5 1.0
	unrelated 15 0.0
