Important blocks for users:
*   Under the "Setup" heading, the second code block moves all the files to the correct location. Users will need to upload the datasets to their drive and then change the drive directory. Download the datasets here: https://github.com/Trusted-AI/AIF360/blob/master/aif360/data/README.md
*   Under the "Sorting the Data" heading, the first cell block includes a line allowing you to choose what dataset to run the model on





#Setup

In [None]:
# import packahes and mount google drive

from google.colab import drive
drive.mount('/content/drive')

!pip install aif360
!git clone https://github.com/Trusted-AI/AIF360

import sys
import aif360
import matplotlib
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import random
import math


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
fatal: destination path 'AIF360' already exists and is not an empty directory.


In [None]:
# moving files to correct location
# IMPORTANT: if this is a new user, upload colab files to drive and change drive directory
# the part where I wrote "simons" (the folder I have the files stored in) is the part that you should change

# german dataset
!mv /content/drive/MyDrive/simons/german.data /usr/local/lib/python3.7/dist-packages/aif360/data/raw/german
!mv /content/drive/MyDrive/simons/german.doc /usr/local/lib/python3.7/dist-packages/aif360/data/raw/german

# bank dataset
!mv /content/drive/MyDrive/simons/bank-additional-full.csv /usr/local/lib/python3.7/dist-packages/aif360/data/raw/bank
!mv /content/drive/MyDrive/simons/bank-additional.csv /usr/local/lib/python3.7/dist-packages/aif360/data/raw/bank
!mv /content/drive/MyDrive/simons/bank-additional-names.txt /usr/local/lib/python3.7/dist-packages/aif360/data/raw/bank

# compas dataset
!mv /content/drive/MyDrive/simons/compas-scores-two-years.csv /usr/local/lib/python3.7/dist-packages/aif360/data/raw/compas

# adult dataset
!mv /content/drive/MyDrive/simons/adult.names /usr/local/lib/python3.7/dist-packages/aif360/data/raw/adult
!mv /content/drive/MyDrive/simons/adult.data /usr/local/lib/python3.7/dist-packages/aif360/data/raw/adult
!mv /content/drive/MyDrive/simons/adult.test /usr/local/lib/python3.7/dist-packages/aif360/data/raw/adult

In [None]:
# more imports

# if this block glitches, just run it again and it should work
from aif360.datasets import AdultDataset, BankDataset, CompasDataset, GermanDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.sklearn import metrics

from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error, make_scorer

from numpy import mean

from matplotlib import pyplot as plt
import plotly.express as px

from torch.autograd import Variable

from statistics import mean
from statistics import stdev
#randomness
seed = 3
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.use_deterministic_algorithms(True)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
## configuration to detect cuda or cpu

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print (device)

cpu


# Basic Functions

In [None]:
def per_chg(a,b):
    
    try:
        return (b-a)/b*100
    except:
        return 0

In [None]:
def get_dataset_options(dataset_name):
    
    if dataset_name=="adult":
        pro_attr = 'sex'
        return (AdultDataset(), pro_attr, [{'sex': 1}], [{'sex': 0}])
    
    elif dataset_name=="compas":
        pro_attr = 'race'
        return (CompasDataset(), pro_attr, [{'race': 1}], [{'race': 0}])
    
    elif dataset_name=="bank":
        pro_attr = 'age'
        return (BankDataset(protected_attribute_names=['age'],
            privileged_classes=[lambda x: x >= 25], 
            features_to_drop=['day_of_week']), pro_attr, [{'age': 1}], [{'age': 0}])
    
    elif dataset_name=="german":   
        pro_attr = 'age'
        label_map = {1.0: 'Good Credit', 0.0: 'Bad Credit'}
        #g = load_preproc_data_german(['age'])
        g = GermanDataset(metadata={'label_maps': [label_map]})
        g.labels = (2.0 - g.labels).astype('float64')
        g.favorable_label = 1.0
        g.unfavorable_label = 0.0
        return (g, pro_attr, [{'age': 1}], [{'age': 0}])

In [None]:
def get_statistics(arr, num):
  list_stats = ["br", "fnr", "fpr", "accuracy", "f1"]
  temp = []

  for metric in range (5):
    temp.clear()

    for counter in range (num):
      temp.append(arr[counter][metric])

    print(list_stats[metric], ":",  mean(temp), ", stdev:" , stdev(temp))

# Loss Functions

In [None]:
# this is a special function that is part of our novel loss function

def scal(x, scale):
    if x<=0:
        return x+1
    else:
        return torch.exp(scale*x)

In [None]:
# this is the baseline fairness-considerate loss function

def loss_fn(y_pred, y, pro_attr, alpha):
    bce = nn.BCELoss()

    y1, y_pred_1 = y[pro_attr==1], y_pred[pro_attr==1]
    y0, y_pred_0 = y[pro_attr==0], y_pred[pro_attr==0]

    M1, M0 = y_pred_1.mean(), y_pred_0.mean()

    loss = bce(y_pred, y) + alpha * torch.abs(M1 - M0)

    return loss

In [None]:
# this is the loss function with our novel contribution

def loss_fn_new(y_pred, y, pro_attr, scale, base0, base1, alpha, beta):
    bce = nn.BCELoss()

    y1, y_pred_1 = y[pro_attr==1], y_pred[pro_attr==1]
    y0, y_pred_0 = y[pro_attr==0], y_pred[pro_attr==0]
    
    br1 = y_pred_1.mean()
    br0 = y_pred_0.mean()

    loss = bce(y_pred, y) + alpha * torch.abs(br0 - br1) + beta * (scal(base1 - br1, scale) + scal(base0 - br0, scale))

    return loss

# Building the Logistic Regression Model

In [None]:
class LogisticRegression(nn.Module):
    
    def __init__(self, input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(in_features=input_features, out_features=1).to(device)
        torch.nn.init.uniform_(self.linear.weight, a=0.0, b=1.0)
        #net.add_module('Linear_1', ll1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [None]:
def init_weights(m):
  # print(m)
  if type(m) == nn.Linear:
    with torch.no_grad():
      m.weight.fill_(1.0)
      # print(m.weight)

In [None]:
# menu = "bce" for cross-entropy loss
# menu = "baseline" for loss function designed for fairness
# menu = "novel" for our new loss function

# set scale_num to 10 by default

def train_model(menu, train_x, train_y, protected, alpha, beta, br0, br1):
  torch.manual_seed(0)
  model.apply(init_weights)

  for param in model.parameters():
      param.data = nn.parameter.Parameter(torch.rand_like(param))
      
  print("Training start ...")

  # could this part be the issue? the criterion is related to the loss function, right?
  criterion = nn.BCELoss()

  optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
  loss_arr = []

  # training loop
  max_epochs = 500
  for epoch in range(max_epochs):
      predicted = model(train_x)

      #loss function
      if menu == "bce":
        loss = criterion(predicted, train_y)
      elif menu == "baseline":
        # old_loss = loss_fn(predicted, train_y, protected, 0.01, 0.99)
        # old_loss = loss_fn(predicted, train_y, protected, 1.5)
        # loss = Variable(old_loss, requires_grad = True)
        loss = loss_fn(predicted, train_y, protected, alpha)
      elif menu == "novel":
        # base0 and base1 represent the original base rate WITHOUT adjusting the loss function for fairness
        # alphaW represents the weight on the bce loss function
        # betaW represents the weight on the fairness function
        # sigmaW represents the weight on our contribution
        loss = loss_fn_new(predicted, train_y, protected, 10, br0, br1, alpha, beta)
      else:
        print("Yikes! Looks like someone entered an invalid menu option. It's okay, we all make mistakes!")
        return 0

      loss.backward()

      #updates
      optimizer.step()
      optimizer.zero_grad()          
      loss_arr.append(loss.item())
          
      # training stops when max number of epochs reached or when last two iterations have resulted in less than 0.1% improvement 
      if len(loss_arr)>3 and 0<per_chg(loss_arr[-1],loss_arr[-2])<0.1 and 0<per_chg(loss_arr[-2],loss_arr[-3])<0.1:
          # print("Early Stopping!")
          # print("Converged ...")
          break

  # this plots out the loss function!
  # plt.plot(loss_arr)
          
  if epoch==max_epochs:
      print("Failed to converge!")

# Fairness and Accuracy Metrics

In [None]:
# this is a helper function

def get_metrics(pred, true):
  metrics = []
  
  br = pred.mean().item()
  fnr = confusion_matrix(pred, true, "fnr", False).item()
  fpr = confusion_matrix(pred, true, "fpr", False).item()

  with torch.no_grad():
    accuracy = accuracy_score(true, pred)
    f1 = f1_score(true, pred, average='macro')
    metrics = [br, fnr, fpr, accuracy, f1]
    return metrics

In [None]:
# returns the disparity

def get_disparity(pred, true, protected):
  yt1, yt_pred_1 = true[protected==1], pred[protected==1]
  yt0, yt_pred_0 = true[protected==0], pred[protected==0]

  disparity = abs(yt_pred_1.mean() - yt_pred_0.mean())
  disparity.item()
  
  return disparity

In [None]:
# with returnChoice, you can choose whether to return the FPR or FNR
# enter "fpr" or "fnr"
# if you set print_rates = False, it won't print the FPR and FNR

def confusion_matrix(prediction, truth, returnChoice, print_rates):
    """ Returns the confusion matrix for the values in the `prediction` and `truth`
    tensors, i.e. the amount of positions where the values of `prediction`
    and `truth` are
    - 1 and 1 (True Positive)
    - 1 and 0 (False Positive)
    - 0 and 0 (True Negative)
    - 0 and 1 (False Negative)
    """
    # print("prediction:  ", prediction)
    # print("truth:  ", truth)
    prediction = prediction.round()
    confusion_vector = prediction / truth
    # print("confusion_vector:  ", confusion_vector)
    # Element-wise division of the 2 tensors returns a new tensor which holds a
    # unique value for each case:
    #   1     where prediction and truth are 1 (True Positive)
    #   inf   where prediction is 1 and truth is 0 (False Positive)
    #   nan   where prediction and truth are 0 (True Negative)
    #   0     where prediction is 0 and truth is 1 (False Negative)
    # print("==1 : ", (confusion_vector == 1))
    true_positives = (confusion_vector == 1).sum() #.item()
    false_positives = (confusion_vector == float('inf')).sum()  #.item()
    true_negatives = (torch.isnan(confusion_vector)).sum() #.item()
    false_negatives = (confusion_vector == 0).sum() #.item()
    # print("TP FP TN FN : ", true_positives, false_positives, true_negatives, false_negatives)
    fpr, fnr = None, None
    try:
        fpr = false_positives/(false_positives + true_negatives)
        fnr = false_negatives/(false_negatives + true_positives)
    except ZeroDivisionError:
        fpr = 0
        fnr = 0
    if print_rates == True:
      print("FNR: ", fnr)
      print("FPR: ", fpr)
    
    returnChoice = returnChoice.casefold()

    if returnChoice == "fpr":
      ans = fpr
    elif returnChoice == "fnr":
      ans = fnr
    else:
      ans = "Oops! You entered an invalid option! It's okay, we all make mistakes!"

    return ans

# Tuning Hyperparameters

In [None]:
# using the BCE loss function to get an accuracy threshold

def bceTrain(X_train, y_train, pro_tr, X_val, y_val, pro_val):
  train_model("bce", X_train, y_train, pro_tr, 0, 0, 0, 0)

  bce_pred = model(X_val).round()

  with torch.no_grad():
    acc_threshold = accuracy_score(y_val, bce_pred) - 0.10
    # print("Accuracy threshold:", acc_threshold)
    
  bce_pred_0 = bce_pred[pro_val == 0]
  bce_pred_1 = bce_pred[pro_val == 1]

  br0_threshold = bce_pred_0.mean().item()
  br1_threshold = bce_pred_1.mean().item()

  br0_metrics = get_metrics(bce_pred_0, y_val[pro_val == 0])
  br1_metrics = get_metrics(bce_pred_1, y_val[pro_val == 1])
  all_metrics = get_metrics(bce_pred, y_val)
                    
  with torch.no_grad():
    return br0_threshold, br1_threshold, acc_threshold

In [None]:
# using the accuracy threshold to train the model with the baseline fairness loss function

def fairTrain(X_train, y_train, pro_tr, X_val, y_val, pro_val, acc_threshold):
  min_disp = 100

  for alpha in range(1, 11, 1):
    train_model("baseline", X_train, y_train, pro_tr, (alpha/10), 0, 0, 0)

    y_val_pred = model(X_val).round()
    
    with torch.no_grad():
      accuracy = accuracy_score(y_val, y_val_pred)

    disparity = get_disparity(y_val_pred, y_val, pro_val)
    disparity = disparity.item()

    if accuracy >= acc_threshold and disparity < min_disp:
      fairness_model = model
      min_disp = disparity
      alpha_bl_w = alpha/10

  return alpha_bl_w

In [None]:
# using the accuracy threshold to train the model with the novel loss function

def novelTrain(X_train, y_train, pro_tr, X_val, y_val, pro_val, acc_threshold, br0_threshold, br1_threshold):
  min_disp_novel = 100

  # don't start at 0
  for alpha in range(1, 11, 1):
    for beta in range(1, 11, 1):
      train_model("novel", X_train, y_train, pro_tr, (alpha/10), (beta/10), br0_threshold, br1_threshold)

      y_val_pred = model(X_val).round()
      
      val_pred_0 = y_val_pred[pro_val == 0]
      br0 = val_pred_0.mean()
        
      val_pred_1 = y_val_pred[pro_val == 1]
      br1 = val_pred_1.mean()

      with torch.no_grad():
        accuracy = accuracy_score(y_val, y_val_pred)

      disparity = get_disparity(y_val_pred, y_val, pro_val)
      disparity = disparity.item()
      
      if accuracy >= acc_threshold and (br1 - br1_threshold) >= 0 and (br0 - br0_threshold) >= 0:
        if disparity < min_disp_novel:
          # novel_model = model
          min_disp_novel = disparity
          alpha_w = alpha/10
          beta_w = beta/10

  return alpha_w, beta_w

# Sorting the Data

In [None]:
# IMPORTANT: if you want to change what dataset we test on, change dataset_name
# Options:
## "compas"
## "adult"
## "bank"
## "german"

dataset_name = "german"
dataset, pro_attr, privileged_groups, unprivileged_groups = get_dataset_options(dataset_name)
scale_orig = StandardScaler()
dataset.features = scale_orig.fit_transform(dataset.features)

# splitting the data set into X, Y, and protected attribute
X_data = torch.from_numpy(dataset.features.astype(np.float32)).to(device)
y_data = torch.from_numpy(dataset.labels.astype(np.float32)).to(device)
pro_att = torch.tensor(dataset.convert_to_dataframe()[0][pro_attr].astype(int), dtype=torch.int).to(device)

print(X_data)
kf = KFold(n_splits=5, shuffle=True, random_state=1111)
kf_training = KFold(n_splits=5, shuffle=True, random_state = 1111)

tensor([[-1.2365, -0.7451,  0.9185,  ...,  1.2146,  0.1960, -0.1960],
        [ 2.2482,  0.9498, -0.8702,  ..., -0.8233,  0.1960, -0.1960],
        [-0.7387, -0.4166, -0.8702,  ..., -0.8233,  0.1960, -0.1960],
        ...,
        [-0.7387, -0.8745,  0.9185,  ..., -0.8233,  0.1960, -0.1960],
        [ 1.9993, -0.5055,  0.9185,  ...,  1.2146,  0.1960, -0.1960],
        [ 1.9993,  0.4625,  0.0241,  ..., -0.8233,  0.1960, -0.1960]])


# Running the Model (With Validation)

In [None]:
# initializing these variables so we can keep track of the metrics

num_run = 0

# storing the metrics for BCE
bce_arr = []
bce_arr_0 = []
bce_arr_1 = []
bce_disp = 0

# storing the metrics for fairness function
fair_arr = []
fair_arr_0 = []
fair_arr_1 = []
fair_disp = 0

# storing the metrics for novel function
novel_arr = []
novel_arr_0 = []
novel_arr_1 = []
novel_disp = 0

In [None]:
for train_val_index, test_index in kf.split(X_data):
  # splitting the data
  X_val_train, X_test = X_data[train_val_index], X_data[test_index]
  y_val_train, y_test = y_data[train_val_index], y_data[test_index]
  pro_val_train, pro_test = pro_att[train_val_index], pro_att[test_index]

  # in order for our method to work, we need a training, validation, and test set
  # this is why we are splitting the dataset twice
  for train_index, val_index in kf_training.split(X_val_train):
    # splitting the data
    X_train, X_val = X_val_train[train_index], X_val_train[val_index]
    y_train, y_val = y_val_train[train_index], y_val_train[val_index]
    pro_train, pro_val = pro_val_train[train_index], pro_val_train[val_index]

    n_samples, n_features = X_train.shape
    model = LogisticRegression(n_features).to(device)
    
    # getting all necessary weights and thresholds
    br0_threshold, br1_threshold, acc_threshold = bceTrain(X_train, y_train, pro_train, X_val, y_val, pro_val)

    alpha_bl_w = fairTrain(X_train, y_train, pro_train, X_val, y_val, pro_val, acc_threshold)
    alpha_w, beta_w = novelTrain(X_train, y_train, pro_train, X_val, y_val, pro_val, acc_threshold, br0_threshold, br1_threshold)
    
    # for the purposes of testing other parts of the code
    # alpha_bl_w = 1
    # alpha_w, beta_w = 1, 0.5

    # training the models using the weights we found
    train_model("bce", X_train, y_train, pro_train, 0, 0, 0, 0)
    bce_pred = model(X_test).round()

    model = LogisticRegression(n_features).to(device)
    train_model("baseline", X_train, y_train, pro_train, alpha_bl_w, 0, 0, 0)
    fairness_pred = model(X_test).round()

    model = LogisticRegression(n_features).to(device)
    train_model("novel", X_train, y_train, pro_train, alpha_w, beta_w, br0_threshold, br1_threshold)
    novel_pred = model(X_test).round()

    # keeping track of the number of runs so we can get an average
    # this way, we can change n_splits without worrying about anything else
    num_run += 1

    # metrics for BCE
    bce_arr_0.append(get_metrics(bce_pred[pro_test == 0], y_test[pro_test == 0]))
    bce_arr_1.append(get_metrics(bce_pred[pro_test == 1], y_test[pro_test == 1]))
    bce_arr.append(get_metrics(bce_pred, y_test))
    bce_disp += get_disparity(bce_pred, y_test, pro_test)

    # metrics for fairness loss function
    fair_arr_0.append(get_metrics(fairness_pred[pro_test == 0], y_test[pro_test == 0]))
    fair_arr_1.append(get_metrics(fairness_pred[pro_test == 1], y_test[pro_test == 1]))
    fair_arr.append(get_metrics(fairness_pred, y_test))
    fair_disp += get_disparity(fairness_pred, y_test, pro_test)

    # metrics for novel loss function
    novel_arr_0.append(get_metrics(novel_pred[pro_test == 0], y_test[pro_test == 0]))
    novel_arr_1.append(get_metrics(novel_pred[pro_test == 1], y_test[pro_test == 1]))
    novel_arr.append(get_metrics(novel_pred, y_test))
    novel_disp += get_disparity(novel_pred, y_test, pro_test)

Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training start ...
Training sta

# Getting Our Numbers

In [None]:
# getting our numbers for when we use BCE as loss function!

print("BCE numbers!!!!")

print("--------------")
print("Group 0:")
get_statistics(bce_arr_0, num_run)

print("--------------")
print("Group 1:")
get_statistics(bce_arr_1, num_run)

print("--------------")
print("Overall:")
get_statistics(bce_arr, num_run)

print("--------------")
print("Disparity:", (bce_disp/num_run).item())

BCE numbers!!!!
--------------
Group 0:
br : 0.5892685532569886 , stdev: 0.11784003788313588
fnr : 0.28763867676258087 , stdev: 0.100593402750014
fpr : 0.41941551476716993 , stdev: 0.14800052283992718
accuracy : 0.6611562392116248 , stdev: 0.05379189976834685
f1 : 0.6436940828361323 , stdev: 0.053477207086490765
--------------
Group 1:
br : 0.7998406720161438 , stdev: 0.02828676279135596
fnr : 0.10713656663894654 , stdev: 0.02021308086251836
fpr : 0.5439266633987426 , stdev: 0.09443131075772576
accuracy : 0.7724984553859289 , stdev: 0.03537110796861401
f1 : 0.6842333095238132 , stdev: 0.04848536902304381
--------------
Overall:
br : 0.7607999968528748 , stdev: 0.028492686580487668
fnr : 0.1343599683046341 , stdev: 0.02123191185577361
fpr : 0.5119324159622193 , stdev: 0.08654863897986623
accuracy : 0.7512 , stdev: 0.035097483290591286
f1 : 0.6838211070405246 , stdev: 0.04344076919074131
--------------
Disparity: 0.2108840048313141


In [None]:
# getting our numbers for when we use the fairness loss function!

print("Fairness numbers!!!!")

print("--------------")
print("Group 0:")
get_statistics(fair_arr_0, num_run)

print("--------------")
print("Group 1:")
get_statistics(fair_arr_1, num_run)

print("--------------")
print("Overall:")
get_statistics(fair_arr, num_run)

print("--------------")
print("Disparity:", (fair_disp/num_run).item())

Fairness numbers!!!!
--------------
Group 0:
br : 0.7466696095466614 , stdev: 0.11593604252039642
fnr : 0.17197604939341546 , stdev: 0.09013886645216534
fpr : 0.6416500645875931 , stdev: 0.16915018717844715
accuracy : 0.6359195226438189 , stdev: 0.047288612850590744
f1 : 0.5772658082350466 , stdev: 0.07704659283785502
--------------
Group 1:
br : 0.7666326045989991 , stdev: 0.03506857396491271
fnr : 0.13539613038301468 , stdev: 0.027668138050866212
fpr : 0.49810361862182617 , stdev: 0.09682867729212925
accuracy : 0.7641183464985736 , stdev: 0.029858039583666438
f1 : 0.6871198845024364 , stdev: 0.03978824244588826
--------------
Overall:
br : 0.7639999985694885 , stdev: 0.03165569751743825
fnr : 0.14009544372558594 , stdev: 0.027278728186839536
fpr : 0.5374803793430328 , stdev: 0.07762456282127729
accuracy : 0.7396 , stdev: 0.029187611527267305
f1 : 0.6677099997961802 , stdev: 0.03630704769980525
--------------
Disparity: 0.12220436334609985


In [None]:
# getting our numbers for when we use the novel loss function!

print("Novel numbers!!!!")

print("--------------")
print("Group 0:")
get_statistics(novel_arr_0, num_run)

print("--------------")
print("Group 1:")
get_statistics(novel_arr_1, num_run)

print("--------------")
print("Overall:")
get_statistics(novel_arr, num_run)

print("--------------")
print("Disparity:", (novel_disp/num_run).item())

Novel numbers!!!!
--------------
Group 0:
br : 0.9031155681610108 , stdev: 0.08924576955981553
fnr : 0.04981471955776215 , stdev: 0.04916791638756049
fpr : 0.8449916684627533 , stdev: 0.15451806130973783
accuracy : 0.6181817076232621 , stdev: 0.04363706702372939
f1 : 0.4837107101599088 , stdev: 0.10013821194097207
--------------
Group 1:
br : 0.9045552444458008 , stdev: 0.051576294413663654
fnr : 0.04803183261305094 , stdev: 0.029113755665691115
fpr : 0.7741996884346009 , stdev: 0.13392539792896235
accuracy : 0.7537771638394735 , stdev: 0.03460796821533544
f1 : 0.5809170716366444 , stdev: 0.0839931326175424
--------------
Overall:
br : 0.9046000051498413 , stdev: 0.04993579275653168
fnr : 0.04805483534932137 , stdev: 0.028873872370064023
fpr : 0.7907482814788819 , stdev: 0.11699862960649982
accuracy : 0.7278 , stdev: 0.03428070399899434
f1 : 0.5648492988232319 , stdev: 0.07786238844957818
--------------
Disparity: 0.06792286783456802
