In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob

In [2]:
relationships = pd.read_csv('train_relationships.csv')
relationships.head()

Unnamed: 0,p1,p2
0,F0002/MID1,F0002/MID3
1,F0002/MID2,F0002/MID3
2,F0005/MID1,F0005/MID2
3,F0005/MID3,F0005/MID2
4,F0009/MID1,F0009/MID4


In [3]:
plt.imread('train/F0002/MID1/P00012_face2.jpg').shape

(224, 224, 3)

In [4]:
from keras.models import Model
from keras.layers import Convolution2D, Flatten
from keras.layers import Dense, Input, Dropout, concatenate

Using TensorFlow backend.


In [47]:
inp1 = Input(shape = (224,224,3))
x = Convolution2D(filters=32, kernel_size=(3, 3), strides=(2, 2), activation='relu')(inp1)
x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=128, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=256, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=512, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Flatten()(x)
x = Dropout(rate=0.5)(x)
x1 = Dense(64,activation='relu')(x)

inp2 = Input(shape = (224,224,3))
x = Convolution2D(filters=32, kernel_size=(3, 3), strides=(2, 2), activation='relu')(inp2)
x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=128, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=256, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Convolution2D(filters=512, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
x = Flatten()(x)
x = Dropout(rate=0.5)(x)
x2 = Dense(64,activation='relu')(x)

x = concatenate([x1,x2])
output = (Dense(1,activation='sigmoid'))(x)

model = Model(inputs=[inp1,inp2],outputs=output)
model.compile(optimizer='adam',loss='binary_crossentropy')
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv2d_21 (Conv2D)              (None, 111, 111, 32) 896         input_5[0][0]                    
__________________________________________________________________________________________________
conv2d_26 (Conv2D)              (None, 111, 111, 32) 896         input_6[0][0]                    
__________________________________________________________________________________________________
conv2d_22 

In [48]:
def prepare_pair(str1,str2):
    # return the images provided current string path as arrays
    a = plt.imread(str1)
    b = plt.imread(str2)
    return (a,b)

def make_pairs(str1,str2):
    # prepare a list of all image pairs for two folders
    l1 = glob.glob('train/'+str1+'/*')
    l2 = glob.glob('train/'+str2+'/*')
    if (len(l1)>len(l2)):
        l1,l2 = l2,l1
    output = []
    for i in range(len(l1)):
        for j in range(i,len(l2)):
            output.append((l1[i],l2[j]))
    return output

def generate_batch(i,j=None):
    # make a batch ready for training
    # i for a single entry from the pairs dataframe
    # i,j for two individuals that are not related
    if (j is None):
        related = True
        pairs = make_pairs(*list(relationships.iloc[i]))
    else:
        related = False
        pairs = make_pairs(i,j)
    size = len(pairs)
    X1 = np.zeros((size,224,224,3))
    X2 = np.zeros((size,224,224,3))
    if (related):
        y = np.ones((size),dtype=int)
    else:
        y = np.zeros((size),dtype=int)
    i = 0
    for pair in pairs:
        X1[i],X2[i] = prepare_pair(*pair)
        i+=1
    X1/=255.
    X2/=255.
    return X1, X2, y

In [51]:
# training

batch_size = 100
iterations = 250
thresh = 1000

for iteration in range(iterations):
    print('Iteration:',iteration)
    related = 0
    unrelated = 0
    data_r = []
    data_u = []
    # fetch data points
    while (related<batch_size):
        # get datapoints for related
        i = np.random.randint(len(relationships))
        X1, X2, y = generate_batch(i)
        if (len(y)<thresh):
            related+=len(y)
            data_r.append((X1,X2,y))
    while (unrelated<batch_size):
        # get datapoints for unrelated
        i = np.random.randint(len(relationships))
        j = i
        while (j==i):
            j = np.random.randint(len(relationships))
        a = relationships.iloc[i][np.random.randint(2)]
        b = relationships.iloc[j][np.random.randint(2)]
        X1, X2, y = generate_batch(a,b)
        if (len(y)<thresh):
            unrelated+=len(y)
            data_u.append((X1,X2,y))        
    # merge into a balanced set and shuffle
    # join all related points
    X1_r = np.concatenate([r[0] for r in data_r])
    X2_r = np.concatenate([r[1] for r in data_r])
    y_r = np.concatenate([r[2] for r in data_r])
    # randomly choose batch
    choice = np.random.choice(range(len(y_r)),size=batch_size,replace=False)
    X1_r = X1_r[choice]
    X2_r = X2_r[choice]
    y_r = y_r[choice]
    # same for unrelated
    X1_u = np.concatenate([u[0] for u in data_u])
    X2_u = np.concatenate([u[1] for u in data_u])
    y_u = np.concatenate([u[2] for u in data_u])
    # randomly choose batch
    choice = np.random.choice(range(len(y_u)),size=batch_size,replace=False)
    X1_u = X1_u[choice]
    X2_u = X2_u[choice]
    y_u = y_u[choice]
    X1 = np.concatenate([X1_r,X1_u])
    X2 = np.concatenate([X2_r,X2_u])
    y = np.concatenate([y_r,y_u])
    # shuffle batch
    shuffle = np.arange(len(y))
    np.random.shuffle(shuffle)
    X1 = X1[shuffle]
    X2 = X2[shuffle]
    y = y[shuffle]
    # train
    model.train_on_batch([X1,X2],y)

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
It

In [53]:
# evaluation
for epoch in range(10):
    print('Epoch:',epoch)
    # one batch on related
    i = np.random.randint(len(relationships))
    X1, X2, y = generate_batch(i)
    if (len(y)>0):
        p = model.predict([X1,X2])
        p[p>0.5]=1
        p[p<=0.5]=0
        print('\trelated',len(X1),(y==p).mean())
    # one batch on unrelated
    i = np.random.randint(len(relationships))
    j = i
    while (j==i):
        j = np.random.randint(len(relationships))
    a = relationships.iloc[i][np.random.randint(2)]
    b = relationships.iloc[j][np.random.randint(2)]
    X1, X2, y = generate_batch(a,b)
    if (len(y)>0):
        p = model.predict([X1,X2])
        p[p>0.5]=1
        p[p<=0.5]=0
        print('\tunrelated',len(X1),(y==p).mean())

Epoch: 0
	related 2 0.0
	unrelated 27 1.0
Epoch: 1
	related 27 0.0
	unrelated 50 1.0
Epoch: 2
	related 35 0.0
	unrelated 3 1.0
Epoch: 3
	related 13 0.0
	unrelated 1 1.0
Epoch: 4
	related 4 0.0
	unrelated 26 1.0
Epoch: 5
	related 25 0.0
	unrelated 85 1.0
Epoch: 6
	related 35 0.0
	unrelated 3 1.0
Epoch: 7
	related 189 0.0
	unrelated 30 1.0
Epoch: 8
	related 38 0.0
	unrelated 105 1.0
Epoch: 9
	related 45 0.0
	unrelated 18 1.0


In [54]:
# training

batch_size = 250
iterations = 1
thresh = 1000

for iteration in range(iterations):
    print('Iteration:',iteration)
    related = 0
    unrelated = 0
    data_r = []
    data_u = []
    # fetch data points
    while (related<batch_size):
        # get datapoints for related
        i = np.random.randint(len(relationships))
        X1, X2, y = generate_batch(i)
        if (len(y)<thresh):
            related+=len(y)
            data_r.append((X1,X2,y))
    while (unrelated<batch_size):
        # get datapoints for unrelated
        i = np.random.randint(len(relationships))
        j = i
        while (j==i):
            j = np.random.randint(len(relationships))
        a = relationships.iloc[i][np.random.randint(2)]
        b = relationships.iloc[j][np.random.randint(2)]
        X1, X2, y = generate_batch(a,b)
        if (len(y)<thresh):
            unrelated+=len(y)
            data_u.append((X1,X2,y))        
    # merge into a balanced set and shuffle
    # join all related points
    X1_r = np.concatenate([r[0] for r in data_r])
    X2_r = np.concatenate([r[1] for r in data_r])
    y_r = np.concatenate([r[2] for r in data_r])
    # randomly choose batch
    choice = np.random.choice(range(len(y_r)),size=batch_size,replace=False)
    X1_r = X1_r[choice]
    X2_r = X2_r[choice]
    y_r = y_r[choice]
    # same for unrelated
    X1_u = np.concatenate([u[0] for u in data_u])
    X2_u = np.concatenate([u[1] for u in data_u])
    y_u = np.concatenate([u[2] for u in data_u])
    # randomly choose batch
    choice = np.random.choice(range(len(y_u)),size=batch_size,replace=False)
    X1_u = X1_u[choice]
    X2_u = X2_u[choice]
    y_u = y_u[choice]
    X1 = np.concatenate([X1_r,X1_u])
    X2 = np.concatenate([X2_r,X2_u])
    y = np.concatenate([y_r,y_u])
    # shuffle batch
    shuffle = np.arange(len(y))
    np.random.shuffle(shuffle)
    X1 = X1[shuffle]
    X2 = X2[shuffle]
    y = y[shuffle]
    # train
    model.train_on_batch([X1,X2],y)

Iteration: 0
