In [None]:
import os
import sys
import random
import math
import re
import time
import csv
import copy
import argparse
from pathlib import Path
import seaborn as sns


import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torch_data
from torchvision.utils import make_grid
print(torch.__version__)

# Root directory of the project
ROOT_DIR = os.path.abspath("")
print(ROOT_DIR)

sys.path.append(ROOT_DIR+"/DBVAE/")  # To find local version of the library
from db_vae import DBVAE
import debias_utils
import data_utils
import plot_utils
import ppb_utils

%matplotlib inline 

sns.set()

In [None]:
full_dataset = False

In [None]:
# Download and prepare all datasets
data_utils.prepare_datasets(full_dataset)

In [None]:
# all directories to retrieve models and csv's 

slash = "/"

MODELS_DIR_SUM = os.path.join(ROOT_DIR, "final_results{}sum{}models{}".format(slash, slash, slash))
print(MODELS_DIR_SUM)

TRAIN_STATS_DIR_SUM = os.path.join(ROOT_DIR, "final_results{}sum{}train_stats{}".format(slash, slash, slash))
print(TRAIN_STATS_DIR_SUM)

TEST_ACC_DIR_SUM = os.path.join(ROOT_DIR, "final_results{}sum{}test_accuracy{}".format(slash, slash, slash))
print(TEST_ACC_DIR_SUM)

MODELS_DIR_MEAN = os.path.join(ROOT_DIR, "final_results{}mean{}models{}".format(slash, slash, slash))
print(MODELS_DIR_MEAN)

TRAIN_STATS_DIR_MEAN = os.path.join(ROOT_DIR, "final_results{}mean{}train_stats{}".format(slash, slash, slash))
print(TRAIN_STATS_DIR_MEAN)

TEST_ACC_DIR_MEAN = os.path.join(ROOT_DIR, "final_results{}mean{}test_accuracy{}".format(slash, slash, slash))
print(TEST_ACC_DIR_MEAN)

if full_dataset:
    TRAIN_DATA_DIR = os.path.join(ROOT_DIR, "data{}faces{}".format(slash, slash))
else:
    TRAIN_DATA_DIR = os.path.join(ROOT_DIR, "data{}faces_small{}".format(slash, slash))
print(TRAIN_DATA_DIR)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Load models

In [None]:
def load_models(MODELS_DIR):
    models = {} # dictionary with a list of models for every alpha
    filenames = {}

    for filename in os.listdir(MODELS_DIR):  # iterate over files

        alpha = filename.split("_")[2]    

        # load the model
        model = DBVAE(z_dim=100).to(device)
        checkpoint = torch.load(MODELS_DIR + filename , map_location=torch.device(device))
        model.load_state_dict(checkpoint)
        model.eval()
        
        # add model to models dict 
        if alpha not in models.keys():
            models[alpha] = [model]
            filenames[alpha] = [filename[:-8]]
        else:
            models[alpha].append(model)
            filenames[alpha].append(filename[:-8])
    
    return models, filenames

In [None]:
models_sum, filenames_sum = load_models(MODELS_DIR_SUM)
print(filenames_sum)

models_mean, filenames_mean = load_models(MODELS_DIR_MEAN)
print(filenames_mean)

# Select a model

In [None]:
def get_random_model(models, filenames, alpha=None):
    if alpha != None: 
        key = alpha
    else:
        keys = list(models.keys())
        key = random.choice(keys)

    i = random.randint(0, len(models[key]) - 1)
    model = models[key][i]
    
    print("Model {} is selected".format(filenames[key][i]))
    
    return model, key

# -----------------------------------------
# or select a model
# -----------------------------------------

def get_selected_model(model_name, MODELS_DIR):
    
    alpha = model_name.split("_")[2]    

    # load the model
    model = DBVAE(z_dim=100).to(device)
    checkpoint = torch.load(MODELS_DIR + model_name , map_location=torch.device(device))
    model.load_state_dict(checkpoint)
    model.eval()
    
    print("Model {} is selected".format(model_name))
    
    return model, alpha        

In [None]:
# model, alpha = get_random_model(models, filenames, alpha="0.001")
model_sum, alpha_sum= get_selected_model("model_alpha_0.01_v2.pth.tar", MODELS_DIR_SUM)

model_mean, alpha_mean = get_selected_model("model_alpha_0.01_v1.pth.tar", MODELS_DIR_MEAN)

# Test a model

In [None]:
def get_test_accuracies(model):
    accuracies = []
    total_test_accuracy =0
    total_num_faces = 0

    keys = ["male_lighter", "male_darker", "female_lighter", "female_darker"]
                    
    face_evaluator = ppb_utils.PPBFaceEvaluator()
                    
    for key in keys:
        accuracy, num_faces = face_evaluator.evaluate([model], key, patch_stride=0.2, patch_depth=5)
        print("Test accuracy for {}: {} \n".format(key, round(accuracy[0], 4)))
        
        total_test_accuracy += (num_faces * round(accuracy[0], 4))
        total_num_faces += num_faces
        accuracies.append(accuracy)

    total_test_accuracy = total_test_accuracy / total_num_faces
    accuracies.append(total_test_accuracy)
    print("Overall accuracy: {}".format(total_test_accuracy))
    
    return accuracies

In [None]:
# accuracies = get_test_accuracies(model_sum)

# Optionally test each model
Each model is already tested after training, with the results saved in final_results/test_accuracy/, therefore this is optional. 

In [None]:
def get_accuracies_all_models(models, filenames, reduction_type):
    groups = ["male_lighter", "male_darker", "female_lighter", "female_darker", "total"]
    
    Path("final_results/new_test_accuracy/{}/".format(reduction_type)).mkdir(parents=True, exist_ok=True)

    
    for key in models.keys():
        
        for i, model in enumerate(models[key]):
            
            filename = filenames[key][i]
            print("Model {}".format(filename))
            accuracies = get_test_accuracies(model)
            
            f= open("final_results/new_test_accuracy/{}/{}.csv".format(reduction_type, filename),"w+")
            for group, acc in zip(groups, accuracies):
                f.write(group + ", " + str(acc) + "\n") 
            f.close()
            

In [None]:
#get_accuracies_all_models(models_sum, filenames_sum, "sum")

In [None]:
#get_accuracies_all_models(models_mean, filenames_mean, "mean")

# Calculating image probabilities

In [None]:
def get_probabilities(model, TRAIN_DATA_DIR, alpha, device):
    train_data, val_data = data_utils.load_datasets(TRAIN_DATA_DIR)
    train_seq_dataloader = torch_data.DataLoader(train_data, batch_size=24)
    
    latent_means = debias_utils.get_all_latent_means(train_seq_dataloader, model.encoder, 100, device)

    sample_probabilities = debias_utils.get_training_sample_probabilities(latent_means,
                                                       train_data.labels,
                                                       bins=10,
                                                       alpha=float(alpha))

    return train_data, sample_probabilities

In [None]:
train_data, sample_probabilities = get_probabilities(model_sum, TRAIN_DATA_DIR, alpha_sum, device)

# Batch sampling with and without debiasing

In [None]:
def print_faces(image1, image2, title1, title2):
    plt.rcParams["axes.grid"] = False

    plt.figure(figsize=(10, 4))
    
    plt.subplot(121)
    plt.title(title1)
    plt.xticks([]); plt.yticks([])
    plt.imshow(image1.permute((1,2,0)))

    plt.subplot(122)
    plt.title(title2)
    plt.xticks([]); plt.yticks([])
    plt.imshow(image2.permute((1,2,0)))
    
    plt.show()

In [None]:
def sample_faces(sample_probabilities, train_data):
    
    train_sampler = torch_data.RandomSampler(train_data)
    seq_dataloader = torch_data.DataLoader(train_data,
                                                  batch_size=50,
                                                  sampler=train_sampler)
    
    train_sampler = torch_data.WeightedRandomSampler(sample_probabilities, len(sample_probabilities))
    prob_dataloader = torch_data.DataLoader(train_data,
                                                  batch_size=50,
                                                  sampler=train_sampler)
    
    seq_batch = next(iter(seq_dataloader))
    seq_images = seq_batch[0].permute((0,3,1,2))
    seq_labels = seq_batch[1].nonzero().squeeze()
    seq_images = seq_images[seq_labels]
    seq_images = make_grid(seq_images[:12, [2, 1, 0], :, :], nrow=6)

    prob_batch = next(iter(prob_dataloader))
    prob_images = prob_batch[0].permute((0,3,1,2))
    prob_labels = prob_batch[1].nonzero().squeeze()
    prob_images = prob_images[prob_labels]

    prob_images = make_grid(prob_images[:12, [2, 1, 0], :, :], nrow=6)

    
    print_faces(seq_images, prob_images, 
                "Random batch sampling", "Batch sampling with learned debiasing")

In [None]:
sample_faces(sample_probabilities, train_data)

# Faces with highest and lowest probabilities

In [None]:
def probability_faces(sample_probabilities, images):
    indices_by_prob = sample_probabilities.argsort()
    
    biggest = indices_by_prob[-12:]
    biggest_images = images[biggest].permute((0,3,1,2))
    highest = make_grid(biggest_images[:, [2, 1, 0], :, :], nrow=6)

    smallest = indices_by_prob[:12]
    smallest_images = images[smallest].permute((0,3,1,2))
    lowest = make_grid(smallest_images[:, [2, 1, 0], :, :], nrow=6)

    print_faces(highest, lowest, 
                "Faces with the highest sampling probability.", 
                "Faces with the lowest sampling probability.")

In [None]:
probability_faces(sample_probabilities, train_data.images)

# Image Reconstructions

In [None]:
def get_reconstruction(model):
    
    faces = train_data.images[train_data.labels.squeeze().nonzero()].permute(0, 3, 1, 2).float() / 255.
    inds = random.choices(list(range(faces.shape[0])), k=16)    
    faces =faces[inds]
    
    mean, logvar, y_pred, reconstruction, z = model(faces)
    
    faces = make_grid(faces[:, [2, 1, 0], :, :], nrow=int(math.sqrt(16)))
    reconstruction = make_grid(reconstruction[:, [2, 1, 0], :, :].detach(), nrow=int(math.sqrt(16)))

    print_faces(faces, reconstruction, 
                "Original images", 
                "Reconstructions")

In [None]:
get_reconstruction(model_sum)

In [None]:
get_reconstruction(model_mean)

# Interpolate between images

In [None]:
def element_interpolate(a, b, i):
    mix = b - a
    return a + mix * i

def interpolate(model):
    
    plt.rcParams["axes.grid"] = False

    plt.xticks([]); plt.yticks([])
    
    faces = train_data.images[train_data.labels.squeeze().nonzero()].permute(0, 3, 1, 2).float() / 255.
    recons = torch.zeros([40, 3, 64, 64])
    
    numbers = np.linspace(0, 1, num=6)
    vector_inter = np.vectorize(element_interpolate)
            
    for i in range(5):
        inds = random.choices(list(range(faces.shape[0])), k=2)
        
        two_faces =faces[inds]
        mean, logvar, y_pred, reconstruction, z = model(two_faces)
        
        
        spaces = []
        for num in numbers:
            spaces.append(torch.from_numpy(
                vector_inter(mean[0].detach(), mean[1].detach(), num)).float().unsqueeze(0))

        all_z = torch.cat(spaces, 0)
        
        reconstructed = model.decoder(all_z)
        reconstructed = torch.cat((two_faces[0].unsqueeze(0), reconstructed, two_faces[1].unsqueeze(0))).detach()
        recons[i*8:(i+1)*8] = reconstructed[:,[2,1,0],:,:]
        
    interpolated = make_grid(recons, nrow=8)
    plt.imshow(interpolated.permute((1,2,0)))
    plt.show()

In [None]:
interpolate(model_sum)

In [None]:
interpolate(model_mean)

# Test accuracies with different alpha's

In [None]:
def get_test_accuracies(TEST_ACC_DIR):
    
    accs = {}
    
    for filename in os.listdir(TEST_ACC_DIR):  
        accuracies = []

        alpha = filename.split("_")[2]   
        
        with open(TEST_ACC_DIR +filename, mode='r') as infile:
            reader = csv.reader(infile)
            for row in reader: 
                try:
                    accuracies.append(float(row[1].strip()[1:-1]))
                except:
                    pass

            if alpha not in accs.keys():
                accs[alpha] = [accuracies]
            else:
                accs[alpha].append(accuracies)
    
    return accs

def get_mean_var(list_dict):
    
    alphas = []
    mean = []
    var = []

    for alpha in list_dict.keys():
        alphas.append(alpha)
        mean.append(np.array(list_dict[alpha]).mean(axis=0))
        var.append(np.array(list_dict[alpha]).var(axis=0))
    
    return alphas, mean, var

In [None]:
def create_accuracy_graph(alphas, mean, var):
    
    plt.rcParams["axes.grid"] = True

    labels = np.array(['Male Light', 'Male Dark', 'Female Light', 'Female Dark', 'Overall'])
    
#     alpha_sequence = ["basic", "0.001", "0.01", "0.05", "0.1", "nodebias"] 
    alpha_sequence = ["nodebias",  "0.1", "0.05", "0.01", "0.001"] 


    x = np.arange(len(mean[0]))  # the label locations
    width = 0.14  # the width of the bars

    fig, ax = plt.subplots(figsize=(10, 5))

    for i, a in enumerate(alpha_sequence):

        idx = alphas.index(alpha_sequence[i])
        
        rects = ax.bar(x + width*1.04*(i-2.5), mean[idx], width, label="\u03B1 " + alphas[idx])
        plt.errorbar(x + width*1.04*(i-2.5), mean[idx], var[idx], linestyle='None', ecolor='#666666')


    # Add some text for labels, title and custom x-axis tick labels, etc.
    ax.set_ylabel('Accuracy')
    ax.set_title('Accuracy by skin and gender over different \u03B1')
    ax.set_xticks(x)
    ax.set_xticklabels(labels)
    ax.legend()
    
    legend = ax.legend(loc='lower left', shadow=True, frameon=True)

    # Put a nicer background color on the legend.
    frame = legend.get_frame()
    frame.set_facecolor('#ffffff')
    
    ax.set_ylim([0.7,1.0])
    
#     fig.tight_layout()

    plt.show()

In [None]:
accuracies_sum = get_test_accuracies(TEST_ACC_DIR_SUM)

alpha_lst, test_mean_lst, test_var_lst = get_mean_var(accuracies_sum)

create_accuracy_graph(alpha_lst, test_mean_lst, test_var_lst)

In [None]:
accuracies_mean = get_test_accuracies(TEST_ACC_DIR_MEAN)

alpha_lst, test_mean_lst, test_var_lst = get_mean_var(accuracies_mean)

create_accuracy_graph(alpha_lst, test_mean_lst, test_var_lst)

# Recall and Subgroup variance

In [None]:
accuracies_sum = get_test_accuracies(TEST_ACC_DIR_SUM)

alpha_lst, test_mean_lst, test_var_lst = get_mean_var(accuracies_sum)

def accuracy_mean_and_variance(alphas, accuracies):
    alphas = np.array(alphas)
    idx = np.argwhere(np.array(alphas) != "basic").squeeze()
    
    alphas = alphas[idx]
    accuracies = np.array(accuracies)*100
    
    overall = accuracies[:, -1:].squeeze()[idx].round(2)
    subsets = accuracies[:, :-1]
    
    variance = np.var(subsets, axis=1)[idx].round(2)
    
    print("Alpha:      \t Recall:      \t Variance:")
    for i, alpha in enumerate(alphas): 
        print("{}      \t {}      \t {}".format(alpha, overall[i], variance[i]))
    
    return overall, variance

recall, variance = accuracy_mean_and_variance(alpha_lst, test_mean_lst)

# Training loss and validation accuracy during training

In [None]:
def get_loss_acc_csv(TRAIN_STATS_DIR):

    train_loss = {}
    val_acc = {}

    for filename in os.listdir(TRAIN_STATS_DIR): 

        loss = []
        acc = []
    
        alpha = filename.split("_")[2]   
        with open(TRAIN_STATS_DIR + filename, mode='r') as infile:
            reader = csv.reader(infile)
            for i, row in enumerate(reader): 
                if i != 0: 
                    loss.append(float(row[0][1:]))
                    acc.append(float(row[3][1:]))

            if alpha not in train_loss.keys():
                train_loss[alpha] = [loss]
                val_acc[alpha] = [acc]
            else:
                train_loss[alpha].append(loss)
                val_acc[alpha].append(acc)
    
    return train_loss, val_acc

In [None]:
def create_train_loss_val_acc(alpha_lst, train_loss_mean, val_acc_mean):
    plt.figure(figsize=(15, 5))
    
    ax = plt.subplot(121)
    plt.title("Average training Loss")
    for i, lst in enumerate(train_loss_mean):
        x = list(range(len(lst)))
        plt.plot(x, lst, label="\u03B1 " + alpha_lst[i])
    plt.legend()

    plt.subplot(122)
    plt.title("Average validation accuracy")
    for i, lst in enumerate(val_acc_mean):
        x = list(range(len(lst)))
        plt.plot(x, lst, label="\u03B1 " + alpha_lst[i])
    plt.legend()
    
    plt.show()

In [None]:
# train_loss, val_acc = get_loss_acc_csv(TRAIN_STATS_DIR_MEAN)
# alpha_lst, train_loss_mean, train_loss_var = get_mean_var(train_loss)
# alpha_lst, val_acc_mean, val_acc_var = get_mean_var(val_acc)

# create_train_loss_val_acc(alpha_lst, train_loss_mean, val_acc_mean)

In [None]:
# train_loss, val_acc = get_loss_acc_csv(TRAIN_STATS_DIR_SUM)
# alpha_lst, train_loss_sum, train_loss_var = get_mean_var(train_loss)
# alpha_lst, val_acc_sum, val_acc_var = get_mean_var(val_acc)

# create_train_loss_val_acc(alpha_lst, train_loss_sum, val_acc_sum)

# Plot sample probabilities

In [None]:
def plot_probabilities_histogram(sample_probabilities, labels):

    # Show all sample probabilities in a histogram
    histogram_density, bin_edges = np.histogram(sample_probabilities[labels.squeeze() == 1], bins=10)
    plt.style.use('seaborn')
    plt.rcParams["figure.figsize"] = [10, 5]
    plt.hist(bin_edges[:-1], bin_edges, weights=histogram_density, rwidth=0.8, log=True)
    plt.ylabel('Number of faces')
    plt.xlabel('Probability of resampling')
    plt.show()

In [None]:
plot_probabilities_histogram(sample_probabilities, train_data.labels)