In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.utils import data

from tqdm.notebook import tqdm_notebook

import warnings
warnings.filterwarnings('ignore')

device = torch.device("mps" if torch.has_mps else "cpu")
print(device)
from itertools import product
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from aif360.algorithms.postprocessing import (CalibratedEqOddsPostprocessing,
                                              EqOddsPostprocessing,
                                              RejectOptionClassification)
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
random_state = 1
from sklearn.linear_model import LogisticRegression


mps


pip install 'aif360[AdversarialDebiasing]'


In [2]:
# read in data and split
X = torch.load('inputs/rfw_resnet50_face_embeddings.pt').cpu()
y = torch.load('inputs/rfw_resnet50_labels.pt').cpu()
df = pd.read_csv('inputs/rfw_resnet50_df.csv')
df['reference_ethnicity'] = df['reference_ethnicity'].str.lower()
ethnicity = df['reference_ethnicity']
df

Unnamed: 0,reference_identity,candidate_identity,reference_ethnicity,candidate_ethnicity,labels
0,m.0c7mh2,m.0c7mh2,african,African,1.0
1,m.0c7mh2,m.0c7mh2,african,African,1.0
2,m.026tq86,m.026tq86,african,African,1.0
3,m.026tq86,m.026tq86,african,African,1.0
4,m.02wz3nc,m.02wz3nc,african,African,1.0
...,...,...,...,...,...
29311,m.0402tg,m.01npnk3,caucasian,Caucasian,0.0
29312,m.05pbbnj,m.02rrb2n,caucasian,Caucasian,0.0
29313,m.09j6df,m.07kcsqd,african,African,0.0
29314,m.0fhrbz,m.025zgjt,african,African,0.0


In [3]:
train_split, test_split = train_test_split(np.arange(len(X)),test_size=0.2, random_state=random_state)
train_split, val_split = train_test_split(train_split,test_size=0.25, random_state=random_state)
X_train = X[train_split]
X_val = X[val_split]
X_test = X[test_split]
y_train = y[train_split]
y_val = y[val_split]
y_test = y[test_split]

ethnicity_train = ethnicity[train_split].values
ethnicity_train[ethnicity_train=='caucasian'] = 0
ethnicity_train[ethnicity_train=='african'] = 1
ethnicity_train = ethnicity_train.astype(int)

ethnicity_val = ethnicity[val_split].values
ethnicity_val[ethnicity_val=='caucasian'] = 0
ethnicity_val[ethnicity_val=='african'] = 1
ethnicity_val = ethnicity_val.astype(int)

ethnicity_test = ethnicity[test_split].values
ethnicity_test[ethnicity_test=='caucasian'] = 0
ethnicity_test[ethnicity_test=='african'] = 1
ethnicity_test = ethnicity_test.astype(int)

In [4]:
train_df = df.iloc[train_split]
val_df = df.iloc[val_split]
test_df = df.iloc[test_split]
test_df

Unnamed: 0,reference_identity,candidate_identity,reference_ethnicity,candidate_ethnicity,labels
18904,m.0cx09_,m.02q_nsj,caucasian,Caucasian,0.0
28620,m.080gs5b,m.027w0nt,african,African,0.0
11687,m.027q3qg,m.027q3qg,caucasian,Caucasian,1.0
27477,m.034s_j,m.03h3058,african,African,0.0
12283,m.05p7_tb,m.05p7_tb,caucasian,Caucasian,1.0
...,...,...,...,...,...
18134,m.0hzpkcs,m.067fmb,caucasian,Caucasian,0.0
3639,m.05lnvt,m.05lnvt,african,African,1.0
11091,m.01ndxh,m.01ndxh,caucasian,Caucasian,1.0
9369,m.0c2g8y,m.0c2g8y,caucasian,Caucasian,1.0


In [5]:
def confusion_mat(y_pred, y_test):
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    acc = (tn + tp)/(tn+tp+fn+fp)
    return tn, fp, fn, tp, acc
    
def AOE(tn_1,fp_1,fn_1,tp_1,tn_0,fp_0,fn_0,tp_0):
    tpr_1 = tp_1/(tp_1+fn_1)
    tpr_0 = tp_0/(tp_0+fn_0)

    fpr_1 = fp_1/(fp_1+tn_1)
    fpr_0 = fp_0/(fp_0+tn_0)


    return (np.abs(fpr_1-fpr_0) + np.abs(tpr_1 - tpr_0))/2

In [6]:
logisticRegr = LogisticRegression(solver = 'lbfgs')
logisticRegr.fit(X_train, y_train)

LogisticRegression()

In [7]:
val_predictions = logisticRegr.predict(X_val)
val_predictions = np.array(val_predictions).flatten()
val_df['predictions'] = val_predictions
val_df


Unnamed: 0,reference_identity,candidate_identity,reference_ethnicity,candidate_ethnicity,labels,predictions
14342,m.026j908,m.026j908,caucasian,Caucasian,1.0,1.0
253,m.0q3yxb2,m.0q3yxb2,african,African,1.0,0.0
19218,m.0bct0h,m.03qg12,caucasian,Caucasian,0.0,1.0
16461,m.08xxx9,m.0cz95jf,african,African,0.0,0.0
16869,m.0d88dr,m.05zcmc,african,African,0.0,1.0
...,...,...,...,...,...,...
11980,m.03vq05,m.03vq05,caucasian,Caucasian,1.0,0.0
12825,m.0gcr4zx,m.0gcr4zx,caucasian,Caucasian,1.0,1.0
21881,m.0b_c03,m.0278nmt,caucasian,Caucasian,0.0,0.0
1396,m.01t4wm,m.01t4wm,african,African,1.0,0.0


In [8]:
caucasian_val_df = val_df[val_df['reference_ethnicity']=='caucasian']
african_val_df = val_df[val_df['reference_ethnicity']=='african']

caucasian_val_preds = caucasian_val_df.predictions
african_val_preds = african_val_df.predictions

caucasian_val_labels = caucasian_val_df.labels
african_val_labels = african_val_df.labels


caucasian_tn, caucasian_fp, caucasian_fn, caucasian_tp, caucasian_acc = confusion_mat(caucasian_val_preds,caucasian_val_labels)
african_tn, african_fp, african_fn, african_tp, african_acc = confusion_mat(african_val_preds,african_val_labels)


caucasian_tnr = caucasian_tn/(caucasian_tn+caucasian_fp)
caucasian_tpr = caucasian_tp/(caucasian_tp+caucasian_fn)
caucasian_fnr = caucasian_fn/(caucasian_fn+caucasian_tp)
caucasian_fpr = caucasian_fp/(caucasian_fp+caucasian_tn)
caucasian_balanced_accuracy = (caucasian_tpr + caucasian_tnr)/2

african_tnr = african_tn/(african_tn+african_fp)
african_tpr = african_tp/(african_tp+african_fn)
african_fnr = african_fn/(african_fn+african_tp)
african_fpr = african_fp/(african_fp+african_tn)
african_balanced_accuracy = (african_tpr + african_tnr)/2

print(f'caucasian TNR: {caucasian_tn/(caucasian_tn+caucasian_fp):.3f} | caucasian TPR: {caucasian_tp/(caucasian_tp+caucasian_fn):.3f} ')
print(f'caucasian FNR: {caucasian_fn/(caucasian_fn+caucasian_tp):.3f} | caucasian FPR: {caucasian_fp/(caucasian_fp+caucasian_tn):.3f} ')
print(f'caucasian ACC: {caucasian_acc:.3f} | caucasian balanced ACC: {caucasian_balanced_accuracy:.3f} ')

print(f'african TNR: {african_tn/(african_tn+african_fp):.3f} | african TPR: {african_tp/(african_tp+african_fn):.3f} ')
print(f'african FNR: {african_fn/(african_fn+african_tp):.3f} | african FPR: {african_fp/(african_fp+african_tn):.3f} ')
print(f'african ACC: {african_acc:.3f} | african balanced ACC: {african_balanced_accuracy:.3f} ')

caucasian TNR: 0.469 | caucasian TPR: 0.430 
caucasian FNR: 0.570 | caucasian FPR: 0.531 
caucasian ACC: 0.450 | caucasian balanced ACC: 0.450 
african TNR: 0.472 | african TPR: 0.459 
african FNR: 0.541 | african FPR: 0.528 
african ACC: 0.466 | african balanced ACC: 0.466 
