1. Load weights of DeepFace
2. Add classification layer (2)
3. For each syn in syn_list calculate the stats


In [1]:
import keras
import numpy as np
from os import path
from os import listdir
from os.path import isfile, join
from PIL import Image 
from keras.models import Model
import tensorflow
import csv
import pandas as pd
from tqdm import tqdm
import keras.initializers

from sklearn.model_selection import LeaveOneOut
import cv2
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.preprocessing import Normalizer

In [2]:
IMAGE_SIZE = (152, 152) # set by the model 
CHANNELS = 3 # RGB image
NUM_CLASSES = 8631 # classification layer will be removed 
LEARN_RATE = 0.01
MOMENTUM = 0.9

DOWNLOAD_PATH = 'https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'
MD5_HASH = '0b21fb70cd6901c96c19ac14c9ea8b89'

In [3]:
def create_classifying_deepface(image_size=IMAGE_SIZE, channels=CHANNELS, num_classes=NUM_CLASSES, learn_rate=LEARN_RATE, momentum=MOMENTUM):
    """
    Deep CNN architecture primarily for Face Recognition,
    Face Verification and Face Representation (feature extraction) purposes
    "DeepFace: Closing the Gap to Human-Level Performance in Face Verification"
    CNN architecture proposed by Taigman et al. (CVPR 2014)
    """

    wt_init = keras.initializers.RandomNormal(mean=0, stddev=0.01)
    bias_init = keras.initializers.Constant(value=0.5)

    """
    Construct certain functions 
    for using some common parameters
    with network layers
    """
    def conv2d_layer(**args):
        return keras.layers.Conv2D(**args, 
            kernel_initializer=wt_init, 
            bias_initializer=bias_init,
            activation=keras.activations.relu)
    def lc2d_layer(**args):
        return keras.layers.LocallyConnected2D(**args, 
            kernel_initializer=wt_init, 
            bias_initializer=bias_init,
            activation=keras.activations.relu)
    def dense_layer(**args):
        return keras.layers.Dense(**args, 
            kernel_initializer=wt_init, 
            bias_initializer=bias_init)

    """
    Create the network using
    tf.keras.layers.Layer(s)
    """
    deepface = keras.models.Sequential([
        keras.layers.InputLayer(input_shape=(*image_size, channels), name='I0'),
        conv2d_layer(filters=32, kernel_size=11, name='C1'),
        keras.layers.MaxPooling2D(pool_size=3, strides=2, padding='same',  name='M2'),
        conv2d_layer(filters=16, kernel_size=9, name='C3'),
        lc2d_layer(filters=16, kernel_size=9, name='L4'),
        lc2d_layer(filters=16, kernel_size=7, strides=2, name='L5'),
        lc2d_layer(filters=16, kernel_size=5, name='L6'),
        keras.layers.Flatten(name='F0'),
        dense_layer(units=4096, activation=keras.activations.relu, name='F7'),
        keras.layers.Dropout(rate=0.5, name='D0'),
        dense_layer(units=num_classes, activation=keras.activations.softmax, name='F8')
    ], name='DeepFace')
    # deepface.summary()

    """
    A tf.keras.optimizers.SGD will
    be used for training,
    and compile the model
    """
    sgd_opt = keras.optimizers.SGD(lr=learn_rate, momentum=momentum)
    cce_loss = keras.losses.categorical_crossentropy

    deepface.compile(optimizer=sgd_opt, loss=cce_loss, metrics=['accuracy'])
    
    return deepface

In [4]:
def get_weights():
    filename = 'deepface.zip'
    downloaded_file_path = keras.utils.get_file(filename, DOWNLOAD_PATH, 
        md5_hash=MD5_HASH, extract=True)
    downloaded_h5_file = path.join(path.dirname(downloaded_file_path), 
        path.basename(DOWNLOAD_PATH).rstrip('.zip'))
    return downloaded_h5_file


def create_deepface():
    model = create_classifying_deepface()
    weights = get_weights()
    model.load_weights(weights)
    
    x = model.layers[-2].output
    x = Dense(2, activation='softmax', name='predictions')(x)
    model2 = Model(model.input, x)
    
    return model2

In [5]:
def load_data(syn, GENERAL_DIR):
    
    data, labels = [], [] 
    
    syn_dir = GENERAL_DIR + "\\{}\{}-patients".format(syn, syn)
    ID_dir = GENERAL_DIR + "\\{}\{}-selected-ID-controls".format(syn, syn)

    # get list of filenames
    files_syn = [f for f in listdir(syn_dir) if (isfile(join(syn_dir, f)))and ".jpg" in f]
    files_ID = [f for f in listdir(ID_dir) if (isfile(join(ID_dir, f))) and ".jpg" in f]
    
    print("Syn_list: {}, ID_list: {}".format(len(files_syn), len(files_ID)))

    for filename in files_syn:
        im = Image.open(join(syn_dir, filename))
        im = im.resize(IMAGE_SIZE)
        data.append(im)
        labels.append(1)

    for filename in files_ID:
        im = Image.open(join(ID_dir, filename))
        im = im.resize(IMAGE_SIZE)
        data.append(im)
        labels.append(0)    
    
    return data, labels

In [7]:
BATCH_SIZE = 16
GENERAL_DIR = r"H:\Genetica Projecten\Facial Recognition\Studenten en Onderzoekers\Fien" 

# load img data


syn_list = ['ADNP', 'ANKRD11', 'CDK13', 'DEAF1', 'DYRK1A', 'EHMT1', 'FBXO11', 'SON', 'WAC', 'YY1', 'KDVS']
results_file = open("results/deepface_classification.txt", "w")

for syn in syn_list:
    data, labels = load_data(syn, GENERAL_DIR)

    results_file.write("Syndrome {} with {} patients and {} controls\n".format(syn, labels.tolist().count(1), labels.tolist().count(0)))
    all_y, all_probs, all_preds = [], [], [] 

    loo = LeaveOneOut()
    for train_index, test_index in tqdm(loo.split(data)):
        X_train, X_test = np.array(data[train_index]), data[test_index]
        y_train, y_test = np.array(labels[train_index]), labels[test_index]

        model = create_deepface()
        model.fit(x=X_train, y=y_train, batch_size=BATCH_SIZE, epochs=10, shuffle=True)

        y_pred_array = model.predict(X_test)
        y_pred = tf.math.argmax(y_pred_array, -1).numpy()

        all_y.append(y_test[0])
        all_probs.append(y_pred_array[0][1])
        all_preds.append(y_pred) 

    aroc = roc_auc_score(all_y, all_probs)
    tn, fp, fn, tp = confusion_matrix(all_y, all_preds).ravel()
    spec = tn / (tn+fp)  
    sens = tp / (tp+fn)

    results_file.write("AROC: {:.4f}, spec: {:.4f}, sens: {:.4f}\n\n".format(aroc, spec, sens))
    
    break
    
results_file.close()




FileNotFoundError: [WinError 3] Het systeem kan het opgegeven pad niet vinden: 'H:\\Genetica Projecten\\Facial Recognition\\Studenten en Onderzoekers\\Fien\\ADNP\\ADNP-patients'