1. Load weights of DeepFace
2. Add classification layer (2)
3. For each syn in syn_list calculate the stats


In [1]:
import keras
import numpy as np
from os import path
from os import listdir
from os.path import isfile, join
from PIL import Image 
from keras.models import Model
import tensorflow as tf
import csv
import pandas as pd
from tqdm import tqdm
import keras.initializers

from sklearn.model_selection import LeaveOneOut
import cv2
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.preprocessing import Normalizer

Using TensorFlow backend.


In [2]:
import time

In [3]:
print(tf.__version__) # 2.0.0
print(keras.__version__) # 2.2.5

2.0.0
2.2.5


In [4]:
IMAGE_SIZE = (152, 152) # set by the model 
CHANNELS = 3 # RGB image
NUM_CLASSES = 8631 # classification layer will be removed 
LEARN_RATE = 0.01
MOMENTUM = 0.9

DOWNLOAD_PATH = 'https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'
MD5_HASH = '0b21fb70cd6901c96c19ac14c9ea8b89'

In [5]:
wt_init = tf.random_normal_initializer(mean=0, stddev=0.01)
bias_init = tf.constant_initializer(value=0.5)

In [6]:
def create_classifying_deepface(image_size=IMAGE_SIZE, channels=CHANNELS, num_classes=NUM_CLASSES, learn_rate=LEARN_RATE, momentum=MOMENTUM):
    
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(*image_size, channels), name='I0'))
    model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=11, activation=tf.nn.relu, kernel_initializer=wt_init, bias_initializer=bias_init, name='C1'))
    model.add(tf.keras.layers.MaxPool2D(pool_size=(3,3), strides=2, padding='same', name='M2'))
    model.add(tf.keras.layers.Conv2D(filters=16, kernel_size=9, activation=tf.nn.relu, kernel_initializer=wt_init, bias_initializer=bias_init, name='C3'))
    model.add(tf.keras.layers.LocallyConnected2D(filters=16, kernel_size=9, activation=tf.nn.relu, kernel_initializer=wt_init, bias_initializer=bias_init, name='L4'))
    model.add(tf.keras.layers.LocallyConnected2D(filters=16, kernel_size=7, strides=2,  activation=tf.nn.relu, kernel_initializer=wt_init, bias_initializer=bias_init, name='L5'))
    model.add(tf.keras.layers.LocallyConnected2D(filters=16, kernel_size=5, activation=tf.nn.relu, kernel_initializer=wt_init, bias_initializer=bias_init, name='L6'))
    model.add(tf.keras.layers.Flatten(name='F7'))
    model.add(tf.keras.layers.Dense(units=4096, activation=tf.nn.relu, kernel_initializer=wt_init, bias_initializer=bias_init, name='F8'))
    model.add(tf.keras.layers.Dropout(rate=0.5, name='D9'))
    model.add(tf.keras.layers.Dense(units=num_classes, activation=tf.nn.softmax, kernel_initializer=wt_init, bias_initializer=bias_init, name='F10'))

    sgd_opt = tf.keras.optimizers.SGD(learning_rate=learn_rate, momentum=momentum)
    cce_loss = tf.keras.losses.CategoricalCrossentropy()

    model.compile(optimizer=sgd_opt, loss=cce_loss, metrics=['acc'])
    weights = get_weights()
    model.load_weights(weights)

    return model

In [7]:
def get_weights():
    filename = 'deepface.zip'
    downloaded_file_path = keras.utils.get_file(filename, DOWNLOAD_PATH, 
        md5_hash=MD5_HASH, extract=True)
    downloaded_h5_file = path.join(path.dirname(downloaded_file_path), 
        path.basename(DOWNLOAD_PATH).rstrip('.zip'))
    return downloaded_h5_file


def create_deepface():
    model = create_classifying_deepface()
    weights = get_weights()
    model.load_weights(weights)
    
    num_classes = 2
    x = model.layers[-2].output
    x = tf.keras.layers.Dense(units=num_classes, activation=tf.nn.softmax, 
                              kernel_initializer=wt_init, bias_initializer=bias_init, name='preds')(x)
    model2 = tf.keras.Model(inputs=model.input, outputs=x)
    
    sgd_opt = tf.keras.optimizers.SGD(learning_rate=LEARN_RATE, momentum=MOMENTUM)
    cce_loss = tf.keras.losses.SparseCategoricalCrossentropy()
    model2.compile(optimizer=sgd_opt, loss=cce_loss, metrics=['acc'])
    
    return model2

In [8]:
def load_data(syn, GENERAL_DIR):    
    data, labels = [], [] 
    
    syn_dir = GENERAL_DIR + "\\{}\{}-patients".format(syn, syn)
    ID_dir = GENERAL_DIR + "\\{}\{}-selected-ID-controls".format(syn, syn)

    # get list of filenames
    files_syn = [f for f in listdir(syn_dir) if (isfile(join(syn_dir, f)))and ".jpg" in f]
    files_ID = [f for f in listdir(ID_dir) if (isfile(join(ID_dir, f))) and ".jpg" in f]
    
    print("Syn_list: {}, ID_list: {}".format(len(files_syn), len(files_ID)))

    for filename in files_syn:
        im = Image.open(join(syn_dir, filename))
        im = im.resize(IMAGE_SIZE)
        data.append(np.array(im, dtype=np.float32))
        labels.append(1)

    for filename in files_ID:
        im = Image.open(join(ID_dir, filename))
        im = im.resize(IMAGE_SIZE)
        data.append(np.array(im, dtype=np.float32))
        labels.append(0)    
    
    return np.array(data), np.array(labels)

In [9]:
BATCH_SIZE = 8
GENERAL_DIR = r"H:\Genetica Projecten\Facial Recognition\Studenten en Onderzoekers\Fien" 

# load img data
syn_list = ['ADNP', 'ANKRD11', 'CDK13', 'DEAF1', 'DYRK1A', 'EHMT1', 'FBXO11', 'SON', 'WAC', 'YY1', 'KDVS']
results_file = open("results/deepface_classification.txt", "w")

for syn in ['YY1']:
    data, labels = load_data(syn, GENERAL_DIR)

    results_file.write("Syndrome {} with {} patients and {} controls\n".format(syn, labels.tolist().count(1), labels.tolist().count(0)))
    all_y, all_probs, all_preds = [], [], [] 

    loo = LeaveOneOut()
    for train_index, test_index in tqdm(loo.split(data)):
        start = time.time()

        X_train, X_test = np.array(data[train_index]), data[test_index]
        y_train, y_test = np.array(labels[train_index]), labels[test_index]

        model = create_deepface()
        model.fit(x=X_train, y=y_train, batch_size=BATCH_SIZE, epochs=4, shuffle=True)

        y_pred_array = model.predict(X_test)
        y_pred = tf.math.argmax(y_pred_array, -1).numpy()

        all_y.append(y_test[0])
        all_probs.append(y_pred_array[0][1])
        all_preds.append(y_pred)  

        tf.keras.backend.clear_session()
        end = time.time()
        print("This iteration took {} minutes".format((end-start)/60.0))
              
    aroc = roc_auc_score(all_y, all_probs, labels=[0,1])
    tn, fp, fn, tp = confusion_matrix(all_y, all_preds, labels=[0,1]).ravel()
    spec = tn / (tn+fp)  
    sens = tp / (tp+fn)

    results_file.write("AROC: {:.4f}, spec: {:.4f}, sens: {:.4f}\n\n".format(aroc, spec, sens))

    
    
results_file.close()




Syn_list: 10, ID_list: 10


0it [00:00, ?it/s]

Train on 19 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


1it [07:04, 424.14s/it]

This iteration took 7.068975106875102 minutes
Train on 19 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


2it [12:59, 403.47s/it]

This iteration took 5.920936556657155 minutes
Train on 19 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


3it [20:03, 409.54s/it]

This iteration took 7.06177499294281 minutes
Train on 19 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4


3it [25:51, 517.04s/it]


KeyboardInterrupt: 