Imports

In [None]:
from google.colab import drive

drive.mount('/content/drive')

%cd '/content/drive/My Drive/Colab Notebooks'

!pip install import-ipynb

import import_ipynb

from bce_loss_functions import *

from cce_loss_functions import *

from custom_metrics import *

from custom_data_loader import custom_image_dataset_from_directory

from fairface_data_loader import column_clean_up, get_labels

import os

import gc

import pandas as pd

import numpy as np

import tensorflow as tf

from tensorflow.python.framework import constant_op

from tensorflow import keras

from keras.applications.resnet_v2 import preprocess_input


constants and variables

In [None]:
FILEPATHS = {

            'TRAIN_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/train/train_labels_dir_ordered.csv',
            'TEST_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/test/test_labels_dir_ordered.csv',
            'CEST_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/test/test_labels_{}_{}_dir_ordered.csv',
            'VAL_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/validation/validation_labels_dir_ordered.csv',



}

DIRECTORIES = {

            'MODELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/models/fairface.bce/',
            'OUTPUT':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/output/fairface/test_eval.bce/',

            'TRAIN_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/train/',
            'TEST_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/test/',
            'CEST_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/test/{}/{}',
            'VAL_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/validation/',

}

CATEGORIES = ['race','race-sex']

TARGETS = ['adult','race', 'race-sex','sex']

NUMBER_OF_CLASSES = {'adult':1,'race':7,'race-sex':14,'sex':1}

VERBOSE = 1

category = 'race-sex'

target = 'sex'

binary = True if target == 'adult' or target == 'sex' else False

data and preprocessing functions

In [None]:
def preprocess(images, labels=None):

  return preprocess_input(images), labels

def get_dataset(race='',sex='',standard=True):

    print('category:\t{}\ntarget:\t{}\ndata: {} {}'.format(category,target,race,sex))

    binary = True if NUMBER_OF_CLASSES[target] < 2 else False

    df = pd.read_csv(FILEPATHS['CEST_LABELS'].format(race,sex))

    df, category_names = column_clean_up(df)

    babels = list(df[target].copy())

    labels = list(df[target].copy()) if standard else get_labels(df,category=category,target=target,number_of_classes=NUMBER_OF_CLASSES[target],c_names=category_names[category],verbose=False)

    labels = tf.keras.utils.to_categorical(labels, NUMBER_OF_CLASSES[target]) if standard and not binary else labels

    dataset, image_paths = custom_image_dataset_from_directory(
        DIRECTORIES['CEST_IMGS'].format(sex,race),
        labels=labels,
        color_mode='rgb',
        batch_size=32,
        image_size=(200, 200),
        shuffle=False,
        interpolation='bilinear'
    )

    dataset = dataset.map(preprocess)

    return dataset, babels, list(df['file'].copy())

testing functions for baseline and modified loss models

In [None]:
def get_binary_classes(y_pred):

    labels = [1 if y > 0.5 else 0 for y in list(y_pred)]

    return labels

def get_categorical_classes(y_pred):

    numpy_y_pred = np.array(y_pred)

    return np.argmax(numpy_y_pred, axis=1)

def test_standard_model(test_data):

    model_name = 'standard_BCE_fairface_dir_t-{}_.keras'.format(target) if binary else 'standard_CCE_fairface_dir_t-{}_.keras'.format(target)

    model_path = DIRECTORIES['MODELS'] + model_name

    model = keras.models.load_model(model_path)

    predictions = model.predict(test_data)

    predictions = get_binary_classes(predictions) if binary else get_categorical_classes(predictions)

    file_path = DIRECTORIES['OUTPUT'] + 'test_{}_d_{}_{}.csv'.format(model_name,race,sex)

    f = open(file_path,'w')

    f.write('filename,target,prediction\n')

    for i in range(len(test_filenames)):

        f.write('{},{},{}\n'.format(test_filenames[i],test_labels[i],predictions[i]))

    f.close()

    del model

    gc.collect()

def test_model(test_data,model_name,model_path,loss,alpha=0,beta=0):

    losses, custom_objects = None, None

    if binary:

        losses = {
                    'v1:MGR':MaxGroupRiskBCEv1(alpha=alpha),
                    'v2:MGR':MaxGroupRiskBCEv2(alpha=alpha),
                    'v3:MGR':MaxGroupRiskBCEv3(alpha=alpha,beta=beta),
                    'v4:MGR':MaxAbSumDiffBCEv1(alpha=alpha,beta=beta),
                    'v5:MGR':MaxAbSumDiffBCEv2(alpha=alpha,beta=beta),
                    'v6:MGR':MaxAbSumDiffBCEv3(alpha=alpha,beta=beta),
                    'v7:MGR':MaxAbSumDiffBCEv4(alpha=alpha,beta=beta),
                    'v1:ASD':AbSumDiffBCEv1(alpha=alpha),
                    'v2:ASD':AbSumDiffBCEv2(alpha=alpha),
                    'v3:ASD':AbSumDiffBCEv3(alpha=alpha,beta=beta),
                    }

        custom_objects = {
                          'loss':losses[loss],
                          'custom_binary_accuracy':CBA(),
                          'custom_true_positives':CTP(),
                          'custom_false_positives':CFP(),
                          'custom_true_negatives':CTN(),
                          'custom_false_negatives':CFN()
                          }

    else:

        losses = {
                    'v1:MGR':MaxGroupRiskCCEv1(alpha=alpha),
                    'v1:ASD':AbSumDiffCCEv1(alpha=alpha),
                    'v4:MGR':MaxAbSumDiffCCEv1(alpha=alpha,beta=beta)
                    }

        custom_objects = {
                          'loss':losses[loss],
                          'custom_categorical_accuracy':CCA(number_of_classes=NUMBER_OF_CLASSES[target])
                          }

    model = keras.models.load_model(model_path, custom_objects=custom_objects)

    predictions = model.predict(test_data)

    predictions = get_binary_classes(predictions) if binary else get_categorical_classes(predictions)

    file_path = DIRECTORIES['OUTPUT'] + 'test_{}_d_{}_{}.csv'.format(model_name,race,sex)

    f = open(file_path,'w')

    f.write('filename,target,prediction\n')

    for i in range(len(test_filenames)):

        f.write('{},{},{}\n'.format(test_filenames[i],test_labels[i],predictions[i]))

    f.close()


    del losses
    del custom_objects
    del model

    gc.collect()


evaluation functions for baseline and modified loss models

In [None]:
def evaluate_standard_model(test_data):

    model_name = 'standard_BCE_fairface_dir_t-{}_2.keras'.format(target) if binary else 'standard_CCE_fairface_dir_t-{}_3.keras'.format(target)

    model_path = DIRECTORIES['MODELS'] + model_name

    model = keras.models.load_model(model_path)

    results = model.evaluate(test_data, verbose=VERBOSE, return_dict=True)

    print(results)

    fn = 'eval_BCE_c-{}_t-{}_d_{}_{}.csv'.format(category,target,race,sex) if binary else 'eval_CCE_c-{}_t-{}_d_{}_{}.csv'.format(category,target,race,sex)

    file_path = DIRECTORIES['OUTPUT'] + fn

    f = open(file_path,'w')

    if binary:
        f.write('model,loss,accuracy,true_positives,false_positives,true_negatives,false_negatives\n')
        f.write('{},{},{},{},{},{},{}\n'.format(model_name,results['loss'],results['binary_accuracy'],results['true_positives'],results['false_positives'],results['true_negatives'],results['false_negatives']))

    else:
        f.write('model,loss,accuracy\n')
        f.write('{},{},{}\n'.format(model_name,results['loss'],results['categorical_accuracy']))

    f.close()

    del model

    gc.collect()

def evaluate_model(test_data,model_name,model_path,loss,alpha=0,beta=0):

    losses, custom_objects = None, None

    if binary:

        losses = {
                    'v1:MGR':MaxGroupRiskBCEv1(alpha=alpha),
                    'v2:MGR':MaxGroupRiskBCEv2(alpha=alpha),
                    'v3:MGR':MaxGroupRiskBCEv3(alpha=alpha,beta=beta),
                    'v4:MGR':MaxAbSumDiffBCEv1(alpha=alpha,beta=beta),
                    'v5:MGR':MaxAbSumDiffBCEv2(alpha=alpha,beta=beta),
                    'v6:MGR':MaxAbSumDiffBCEv3(alpha=alpha,beta=beta),
                    'v7:MGR':MaxAbSumDiffBCEv4(alpha=alpha,beta=beta),
                    'v1:ASD':AbSumDiffBCEv1(alpha=alpha),
                    'v2:ASD':AbSumDiffBCEv2(alpha=alpha),
                    'v3:ASD':AbSumDiffBCEv3(alpha=alpha,beta=beta),
                    }

        custom_objects = {
                          'loss':losses[loss],
                          'custom_binary_accuracy':CBA(),
                          'custom_true_positives':CTP(),
                          'custom_false_positives':CFP(),
                          'custom_true_negatives':CTN(),
                          'custom_false_negatives':CFN()
                          }

    else:

        losses = {
                    'v1:MGR':MaxGroupRiskCCEv1(alpha=alpha),
                    'v1:ASD':AbSumDiffCCEv1(alpha=alpha),
                    'v4:MGR':MaxAbSumDiffCCEv1(alpha=alpha,beta=beta)
                  }

        custom_objects = {
                          'loss':losses[loss],
                          'custom_categorical_accuracy':CCA(number_of_classes=NUMBER_OF_CLASSES[target])
                          }

    model = keras.models.load_model(model_path, custom_objects=custom_objects)

    results = model.evaluate(test_data, verbose=VERBOSE, return_dict=True)

    print(results)

    fn = 'eval_BCE_c-{}_t-{}_d_{}_{}.csv'.format(category,target,race,sex) if binary else 'eval_CCE_c-{}_t-{}_d_{}_{}.csv'.format(category,target,race,sex)

    file_path = DIRECTORIES['OUTPUT'] + fn

    f = open(file_path,'a')

    if binary:

        f.write('{},{},{},{},{},{},{}\n'.format(model_name,results['loss'],results['custom_binary_accuracy'],results['custom_true_positives'],results['custom_false_positives'],results['custom_true_negatives'],results['custom_false_negatives']))

    else:

        f.write('{},{},{}\n'.format(model_name,results['loss'],results['custom_categorical_accuracy']))

    f.close()

    del losses
    del custom_objects
    del model

    gc.collect()


test and evaluate on each model on every category separately

In [None]:
for race in ['Black','East Asian','Indian','Latino-Hispanic','Middle Eastern','Southeast Asian','White']:

    for sex in ['Female','Male']:

        test_data_c, test_labels, test_filenames = get_dataset(race,sex,standard=False)

        test_data_s, _, _ = get_dataset(race,sex,standard=True)

        print('category:\t{}\ntarget:\t{}\ndata:\t{} {}'.format(category,target,race,sex))

        # evaluate and test baseline model

        evaluate_standard_model(test_data_s)

        test_standard_model(test_data_s)

        models = os.listdir(DIRECTORIES['MODELS'])

        # evaluate and test modified loss models

        for model_name in models:

            mn = model_name.split('_')

            category_dash = 'c-{}'.format(category)

            target_dash = 't-{}'.format(target)

            if target_dash not in mn or category_dash not in mn or 'standard' in mn:

                continue

            print(model_name)

            if mn[4] != 'fairface':

                evaluate_model(test_data_c,model_name,DIRECTORIES['MODELS'] + model_name, mn[0], float(mn[3]), float(mn[4]))

                test_model(test_data_c,model_name,DIRECTORIES['MODELS'] + model_name, mn[0], float(mn[3]), float(mn[4]))


            else:

                evaluate_model(test_data_c,model_name,DIRECTORIES['MODELS'] + model_name, mn[0], float(mn[3]))

                test_model(test_data_c,model_name,DIRECTORIES['MODELS'] + model_name, mn[0], float(mn[3]))
