# Preparation

## Imports

In [1]:
# TensorFlow
import tensorflow as tf
import tensorboard as tb
from tensorflow.keras.layers import Input, Layer, Conv2D, MaxPool2D, Flatten, Dense
# Data handling
import pandas as pd
import numpy as np
# Plotting
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import seaborn as sns
# Other stuff
import itertools
import datetime
import time
import pickle
import functools
import json
import sys
import os
import shutil

## Paths

In [2]:
# Base path for functions
base_path = "C:/repos/"
sys.path.append(base_path)
# Path where configs are stored
configs_path = base_path + "twrds_unbiased_anns/configs/"

# Path top store runs
runs_path = base_path + "twrds_unbiased_anns/runs/"

# Path to store run results
results_path = base_path + "twrds_unbiased_anns/runs/results/"

## Functions

In [3]:
# Import of functions
# Sample creation
from twrds_unbiased_anns.src.data.samples import create_sample_array, get_sample_data, get_sample_params, convert_sample_to_np_array, gen_from_sample, dataset_from_gen
# Evaluation
from twrds_unbiased_anns.src.data.eval import load_eval_samples, evaluate_performance, evaluate_performance_class, evaluate_model, store_results
# Models
from twrds_unbiased_anns.src.tf.models import get_model
# Losses
from twrds_unbiased_anns.src.tf.losses import get_loss
# Optimizers
from twrds_unbiased_anns.src.tf.optimizers import get_optimizer
# Utils
from twrds_unbiased_anns.src.utils import load_config_from_file, mkdir, rmdir

# Config

In [4]:
# Set name for this run
run_name = "grad_fair_selection_base"

# Manual Config
name = "fair_grad_lambda_{}_target_weight_{}"
eval_sample_filename = "eval_500_mean_100_std_20.data"

# Model Config
modelname = "GRAD" 
target_loss_weight = 0.001
n_attributes = 2

# Data Config
dataset_size = 400
colors = ["red"]
m_diff = 50
std = 3
share = 50
category = "color"

# Training Config
repeats_per_model = 3
batch_size = 16 
n_epochs = 50
optimizer = "Adam"

# Set run directory
run_dir = runs_path + run_name

# Get current date
cur_date = datetime.datetime.today()
date_str = cur_date.strftime("%d-%m-%Y")

# Create directory for this run
mkdir(run_dir, delete = False)

# Create logdir
log_base_dir = run_dir + "/logs/"
mkdir(log_base_dir, delete = False)

# Create checkpoint dir
ckp_base_dir = run_dir + "/checkpoints/"
mkdir(ckp_base_dir, delete = False)

[WinError 183] Eine Datei kann nicht erstellt werden, wenn sie bereits vorhanden ist: 'C:/repos/twrds_unbiased_anns/runs/grad_fair_selection_base'
[WinError 183] Eine Datei kann nicht erstellt werden, wenn sie bereits vorhanden ist: 'C:/repos/twrds_unbiased_anns/runs/grad_fair_selection_base/logs/'
[WinError 183] Eine Datei kann nicht erstellt werden, wenn sie bereits vorhanden ist: 'C:/repos/twrds_unbiased_anns/runs/grad_fair_selection_base/checkpoints/'


# Callbacks

In [5]:
# Load evaluation sample
eval_samples = load_eval_samples(base_path + "twrds_unbiased_anns/data/eval/" + eval_sample_filename)

def check_validation_fairness(model):
    # Set colors
    colors = ["red"]
    # Create empty list of all results
    eval_results = []
    # Create dictionary with model information 
    row = {
        "modelname": "fair_grad",
    } 
    # Run eval
    evaluate_model(model, eval_samples, row, eval_results, colors) 
    # Make dataframe of results
    df = pd.DataFrame(eval_results)
    # Check for prediction accuracy
    for prediction in df.groupby(["shape_color", "shape_type"]).prediction.mean():
        if prediction > 80 and prediction < 120:
            pass
        else:
            return False
    return True

# Custom Metric for combined loss
class CombineMetricsAndSave(tf.keras.callbacks.Callback):

    def __init__(self,**kargs):
        super(CombineMetricsAndSave,self).__init__(**kargs)

    def on_epoch_begin(self, epoch, logs = {}):
        return

    def on_epoch_end(self, epoch, logs = {}):        
        # Calculate combined performance
        if logs["attr_0_output_accuracy"] < 0.6 and logs["attr_1_output_accuracy"] < 0.6:              
            # Check if predictions are in acceptable range
            if check_validation_fairness(self.model):
                global lambda_val
                global repeat
                # Save model and inform user
                print("Found valid model. Saving model ...")
                model_name = "grad_fair_{}_{}_{}".format(lambda_val, repeat, epoch)
                self.model.save(ckp_base_dir + model_name)

# Model

In [9]:
from tensorflow.keras.layers import BatchNormalization

# Gradient reversal operation
@tf.custom_gradient
def grad_reverse(x):
    y = tf.identity(x)
    def custom_grad(dy):
        return -dy
    return y, custom_grad

# Layer that reverses the gradient
class GradReverse(tf.keras.layers.Layer):
    def __init__(self):
        super().__init__()

    def call(self, x):
        return grad_reverse(x)

def get_grad_model_alt(n_attributes, attr_loss_weight, target_loss_weight = 1., classification = False):    
    # Input
    model_input = Input(shape=(360, 360, 3), name="input_img")
    # Feature extractor
    x = Conv2D(32, (3, 3), padding = "same", activation = "relu", name = "conv_1")(model_input)
    x = MaxPool2D(pool_size = (2, 2), name = "pool_1")(x)
    x = Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "conv_2")(x)
    x = MaxPool2D(pool_size = (2, 2), name = "pool_2")(x)
    x = Conv2D(64, (3, 3), padding = "same", activation = "relu", name = "conv_3")(x)
    x = Flatten(name = "flat_1")(x)
    # Target branch
    x_target = Dense(100, activation="relu", name = "target_dense_1")(x)
    x_target = Dense(20, activation="relu", name = "target_dense_2")(x_target)
    if classification:
        target_output = Dense(1, activation = "sigmoid", name = "target_output")(x_target)
    else:
        target_output = Dense(1, name = "target_output")(x_target)
    model_outputs = [target_output]    
    # Create losses, weights and metrics
    if classification:
        losses = {"target_output": tf.keras.losses.BinaryCrossentropy()}
        target_metric = "accuracy"
    else: 
        losses = {"target_output": tf.keras.losses.MeanSquaredError()}
        target_metric = "mean_squared_error"
    weights = {"target_output": target_loss_weight}
    metrics = {"target_output": target_metric}    
    # Split attribute loss over branches
    attr_branch_weight = attr_loss_weight / n_attributes    
    # Attribute branches
    for i in range(n_attributes):
        # Gradient reversal layer
        x_attr = GradReverse()(x)
        # Funnel into sigmoid for binary classification of attributes
        x_attr = Dense(100, activation="relu", name = "attr_{}_dense_1".format(i))(x)
        x_attr = Dense(20, activation="relu", name = "attr_{}_dense_2".format(i))(x_attr)
        x_attr = GradReverse()(x_attr)
        output_name = "attr_{}_output".format(i)
        attr_output = Dense(1, activation = "sigmoid", name = output_name)(x_attr) 
        # Add to outputs
        model_outputs.append(attr_output)
        # Add to losses, weights and metrics
        losses[output_name] = tf.keras.losses.BinaryCrossentropy()
        weights[output_name] = attr_branch_weight
        metrics[output_name] = "accuracy"        
    # Make model
    model = tf.keras.Model(inputs = model_input, outputs = model_outputs, name = "GRAD_CNN")       
    # Compile model
    model.compile(optimizer = tf.keras.optimizers.Adam(), loss = losses, loss_weights = weights, metrics = metrics)    
    # Return finished model
    return model

# Training

## TensorBoard Setup

In [7]:
# Delete tensorboard temp dir
#rmdir("C:/Users/lucas/AppData/Local/Temp/.tensorboard-info")
# Load Tensorboard
%reload_ext tensorboard
%tensorboard --logdir=$log_base_dir

Reusing TensorBoard on port 6006 (pid 9220), started 0:01:29 ago. (Use '!kill 9220' to kill it.)

In [8]:
# Create empty list of all results
results = []

# Load evaluation sample
eval_samples = load_eval_samples(base_path + "twrds_unbiased_anns/data/eval/" + eval_sample_filename)

# Calculate number of steps per epoch
n_steps = int(dataset_size/batch_size)

# Clear session once and then every time before a new model is trained
tf.keras.backend.clear_session()

# Get sample parameters
white_square, white_circle, colorful_square, colorful_circle = get_sample_params(category, m_diff, std, share)

# Prepare and save sample
train_sample = create_sample_array(dataset_size, white_square, white_circle, colorful_square, colorful_circle)
cur_name = name.format(1, target_loss_weight)
sample_filename = run_dir + "/" + "sample_{}_{}".format(cur_name, date_str)
np.save(file = sample_filename, arr = train_sample)

# Create dataset from training data sample
data = dataset_from_gen(train_sample, n_epochs, batch_size, colors, attributes = ["color", "shape"]) 

# Loop through different lambda values
for lambda_val in [10,100,1000]:
    
    cur_name = name.format(lambda_val, target_loss_weight)

    # Loop training for number of repeats
    for repeat in range(1, repeats_per_model + 1):   

        # Clear keras session
        tf.keras.backend.clear_session()

        # Create model (no compilation needed as GRAD is already compiled)
        model = get_model(modelname, task_type = "reg", target_loss_weight = target_loss_weight, attr_loss_weight = lambda_val, n_attributes = n_attributes)
        #model = get_grad_model_alt(n_attributes = n_attributes, attr_loss_weight = lambda_val, target_loss_weight = target_loss_weight, classification = False)
        
        # Create logdir and callback
        logdir = log_base_dir + cur_name + "_" + str(repeat)
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logdir)

        # Do training
        model.fit(data, epochs = n_epochs, steps_per_epoch = n_steps, verbose = 0, callbacks=[CombineMetricsAndSave(), tensorboard_callback])