imports

In [None]:
from google.colab import drive

drive.mount('/content/drive')

%cd '/content/drive/My Drive/Colab Notebooks'

!pip install import-ipynb

import import_ipynb

from bce_loss_functions import *

from cce_loss_functions import *

from custom_metrics import *

from custom_data_loader import custom_image_dataset_from_directory

from fairface_data_loader import column_clean_up, get_labels

import pandas as pd

import tensorflow as tf

from tensorflow import keras

from keras import layers

from keras.models import Model

constants and variables

In [None]:
FILEPATHS = {


            'TRAIN_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/train/train_labels_dir_ordered.csv',
            'TEST_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/test/test_labels_dir_ordered.csv',
            'VAL_LABELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/split/validation/validation_labels_dir_ordered.csv',

}

DIRECTORIES = {

            'MODELS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/models/fairface/',
            'MODEL_CP':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/models/fairface.mcp/',
            'OUTPUT':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/output/fairface/',

            'TRAIN_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/train/',
            'VAL_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/validation/',
            'TEST_IMGS':'/content/drive/MyDrive/Colab Notebooks/s2_dissertation/data/fairface/test/',

}


CATEGORIES = ['race','race-sex']

TARGETS = ['adult','race', 'race-sex','sex']

NUMBER_OF_CLASSES = {'adult':1,'race':7,'race-sex':14,'sex':1}

VERBOSE = 1

category = 'race-sex'

target = 'sex'

binary = True if target == 'adult' or target == 'sex' else False


data and preprocessing functions

In [None]:
from keras.applications.resnet_v2 import ResNet50V2, preprocess_input

h_flip =  keras.layers.RandomFlip(mode='horizontal', seed=1127)

rotate = keras.layers.RandomRotation(0.2, seed=8675)

def preprocess(images, labels=None):

  return preprocess_input(images), labels

def preprocess_training(images, labels=None):

  images = preprocess_input(images)

  images = rotate(images,training=True)

  images = h_flip(images,training=True)

  return images, labels

def get_dataset(data_type='TRAIN',shuffle=True):

    print('category:\t{}\ntarget:\t{}\ndata: {}'.format(category,target,data_type))

    key = '{}_LABELS'.format(data_type)

    df = pd.read_csv(FILEPATHS[key])

    df, category_names = column_clean_up(df)

    key = '{}_IMGS'.format(data_type)

    labels = get_labels(df,category=category,target=target,number_of_classes=NUMBER_OF_CLASSES[target],c_names=category_names[category],verbose=False)

    dataset, _ = custom_image_dataset_from_directory(
            DIRECTORIES[key],
            labels=labels,
            color_mode='rgb',
            batch_size=32,
            image_size=(200, 200),
            shuffle=shuffle,
            interpolation='bilinear'
        )

    dataset = dataset.map(preprocess_training) if data_type == 'TRAIN' else dataset.map(preprocess)

    return dataset

get data

In [None]:
train_data = get_dataset(data_type='TRAIN',shuffle=True)

test_data = get_dataset(data_type='TEST',shuffle=False)

val_data = get_dataset(data_type='VAL',shuffle=False)

category:	race-sex
target:	sex
data: TRAIN
Found 69396 files
category:	race-sex
target:	sex
data: TEST
Found 17348 files
category:	race-sex
target:	sex
data: VAL
Found 10954 files


function for model creation, training, and initial evaluation

In [None]:
def create_model(model_name=None,loss=None):

    binary = True if target == 'sex' else False

    inputs = keras.Input(shape=(200,200,3))

    base_model = ResNet50V2(
        include_top=False,
        input_shape=(200, 200, 3),
        input_tensor=inputs,
        weights='imagenet',
    )

    base_model.trainable = False  # freeze all base model (ResNet50V2) layers to preserve the imagenet weights for transfer learning

    # unfreeze only the batch normalization layers so they can learn the new data's batch means and variances

    for layer in base_model.layers:

        if 'BatchNormalization' in layer.__class__.__name__:

            layer.trainable = True

    x = base_model.output

    x = layers.GlobalAveragePooling2D()(x) # add trainable global average pooling layer to model

    x = layers.Dropout(0.7)(x) # add dropout to model

    output = layers.Dense(1, activation='sigmoid')(x) if binary else layers.Dense(7, activation='softmax')(x) # add final layer for either binary or categorical classification

    model = Model(inputs = base_model.input, outputs = output)

    model.summary(show_trainable=True)

    metrics = [CBA(),CTP(),CFP(),CTN(),CFN()] if binary else [CCA(number_of_classes=NUMBER_OF_CLASSES[target])] # custom metrics

    callbacks = [
                keras.callbacks.ReduceLROnPlateau(
                    monitor='val_loss',
                    factor=0.1,
                    patience=5,
                    verbose=1,
                    mode='auto',
                    min_delta=1e-4,
                    cooldown=0,
                    min_lr=0
                ),
                keras.callbacks.ModelCheckpoint(
                    DIRECTORIES['MODEL_CP'] + '{}_'.format(model_name) + '{epoch:02d}-{val_loss:.2f}.keras',
                    monitor='val_{}'.format(metrics[0].name), # monitor validation accuracy since loss will be impacted negatively due to modified loss function
                    verbose=1,
                    save_best_only=True,
                    save_weights_only=False,
                    mode='auto',
                    save_freq='epoch',
                    initial_value_threshold=None
                )
    ]

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001,weight_decay=None), loss=loss, metrics=metrics)

    history = model.fit(train_data, validation_data=val_data, epochs=15, verbose=VERBOSE, callbacks=callbacks)

    base_model.trainable = True  # unfreeze all base model (ResNet50V2) layers for fine tuning the model

    model_path = DIRECTORIES['MODEL_CP'] + model_name + '_1.keras'

    model.save(model_path)

    print('acc:\t',history.history[metrics[0].name][-1])

    print('loss:\t',history.history['loss'][-1])

    if binary:

        print('tp:\t',history.history['custom_true_positives'][-1])

        print('fp:\t',history.history['custom_false_positives'][-1])

        print('tn:\t',history.history['custom_true_negatives'][-1])

        print('fn:\t',history.history['custom_false_negatives'][-1])

    model.compile(optimizer=keras.optimizers.Adam(0.0001,weight_decay=0.0),loss=loss,metrics=metrics) # reduce learning rate for fine tuning

    history = model.fit(train_data, validation_data=val_data, epochs=15, verbose=VERBOSE, callbacks=callbacks)

    model_path = DIRECTORIES['MODELS'] + model_name + '_2.keras'

    model.save(model_path)

    print('acc:\t',history.history[metrics[0].name][-1])

    print('loss:\t',history.history['loss'][-1])

    if binary:

        print('tp:\t',history.history['custom_true_positives'][-1])

        print('fp:\t',history.history['custom_false_positives'][-1])

        print('tn:\t',history.history['custom_true_negatives'][-1])

        print('fn:\t',history.history['custom_false_negatives'][-1])

    # evaluate the model on validation and test data, then log results

    val_results = model.evaluate(val_data)

    print(val_results)

    test_results = model.evaluate(test_data)

    print(test_results)

    fn = DIRECTORIES['OUTPUT'] + 'fairface_dir_model_metrics_t-{}_c-{}.csv'.format(target,category)

    f = open(fn, 'a')

    if binary:

        f.write('{},{},{},{},{},{},{},{},{},{},{},{},{}\n'.format(model_name,val_results[0],val_results[1],val_results[2],val_results[3],val_results[4],val_results[5],test_results[0],test_results[1],test_results[2],test_results[3],test_results[4],test_results[5]))

    else:

        f.write('{},{},{},{},{}\n'.format(model_name,val_results[0],val_results[1],test_results[0],test_results[1]))

    f.close()



create and train single group risk modified loss models

In [None]:
print('category:\t{}\ntarget:\t{}'.format(category,target))

fn = DIRECTORIES['OUTPUT'] + 'fairface_dir_model_metrics_t-{}_c-{}.csv'.format(target,category)

f = open(fn, 'w')

if binary:

    f.write('model,val_loss,val_acc,val_tp,val_fp,val_tn,val_fn,test_loss,test_acc,test_tp,test_fp,test_tn,test_fn\n')

else:

    f.write('model,val_loss,val_acc,test_loss,test_acc\n')

f.close()

for alpha in [0,0.5,0.9]:

    print('\n\nalpha value:\t\t',alpha)

    losses = None

    if binary:

        losses = {
            'v1:MGR_Equity_BCE':MaxGroupRiskBCEv1(alpha=alpha),
            'v2:MGR_Equality_BCE':MaxGroupRiskBCEv2(alpha=alpha),
            'v1:ASD_Equity_BCE':AbSumDiffBCEv1(alpha=alpha),
            'v2:ASD_Equality_BCE':AbSumDiffBCEv2(alpha=alpha),
        }

    else:

        losses = {
            'v1:MGR_Equity_CCE':MaxGroupRiskCCEv1(alpha=alpha,number_of_classes=NUMBER_OF_CLASSES[target]),
            'v1:ASD_Equity_CCE':AbSumDiffCCEv1(alpha=alpha,number_of_classes=NUMBER_OF_CLASSES[target])
        }


    for ver,loss in losses.items():

        model_name = '{}_{}_'.format(ver,alpha)

        print('\n\ncreating {} alpha = {} loss function model'.format(ver,alpha))

        create_model(model_name + 'fairface_dir_c-{}_t-{}'.format(category,target),loss)

create and train combo group risk modified loss models

In [None]:
print('category:\t{}\ntarget:\t{}'.format(category,target))

for alpha in [0.5,0.9]:

    for beta in [0.25,0.5,0.75]:

        print('\n\nalpha-beta value:\t\t{} - {}',alpha,beta)

        losses = None

        if binary:

            losses = {
                'v3:MGR_Equity_BCE':MaxGroupRiskBCEv3(alpha=alpha,beta=beta),
                'v3:ASD_Equity_BCE':AbSumDiffBCEv3(alpha=alpha,beta=beta),
                'v4:MGR_ASD_BCE':MaxAbSumDiffBCEv1(alpha=alpha,beta=beta),
                'v5:MGR_ASD_BCE':MaxAbSumDiffBCEv2(alpha=alpha,beta=beta),
                'v6:MGR_ASD_BCE':MaxAbSumDiffBCEv3(alpha=alpha,beta=beta),
                'v7:MGR_ASD_BCE':MaxAbSumDiffBCEv4(alpha=alpha,beta=beta)
            }

        else:

            losses = {
                'v4:MGR_ASD_CCE':MaxAbSumDiffCCEv1(alpha=alpha,beta=beta,number_of_classes=NUMBER_OF_CLASSES[target]),
            }

        for ver,loss in losses.items():

            model_name = '{}_{}_{}_'.format(ver,alpha,beta)

            print('\n\ncreating {} alpha = {} beta = {} loss function model'.format(ver,alpha,beta))

            create_model(model_name + 'fairface_dir_c-{}_t-{}'.format(category,target),loss)