# AIF 360 Adult Dataset analysis with cross-validation

#### Uncomment cell bellow if libraries need to be installed. 

In [None]:
# !pip install aif360
# !pip install fairlearn
# !pip install tensorflow

# AdultDataset 



In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from aif360.datasets import AdultDataset
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.metrics import ClassificationMetric 
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import SVC

np.random.seed(42) # Choose random seed to replicate the results


#### Uncomment cell below for entering the folder in which the required files are stored. 

In [None]:
# cd /usr/local/lib/python3.7/dist-packages/aif360/data/raw/adult

/usr/local/lib/python3.7/dist-packages/aif360/data/raw/adult


#### Uncomment cell below for downloading the required files. 

In [None]:
# !wget https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data
# !wget https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test
# !wget https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names

In [None]:
# Representation of a standard ML classification

dataset_orig = load_preproc_data_adult(['sex']) # Load adult dataset as a pre-set class made by AIF360

# Assign privileged and unprivileged classes. 
priv = [{'sex': 1}]
unpriv = [{'sex': 0}]

# Split the dataset into train and test. 
train, test = dataset_orig.split([0.7], shuffle=True)
# Standard scaler for one-hot encoding. 
scaler = MinMaxScaler()
# Assign train and test data to be predicted and compared to expected outputs. 
X_train = scaler.fit_transform(train.features)
y_train = train.labels.ravel()
X_test = scaler.transform(test.features) 
y_test = test.labels.ravel()
# Choose an example of a classifier. In this example, Logistic Regression is used. 
clf = LogisticRegression(solver='liblinear', C = 0.5, random_state=1)  
# Train the classifier. 
clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
test_pred = test.copy()
test_pred.labels = predictions
# Test the accuracy by comparing predictions with test data. 
acc = sum(predictions==y_test)/len(y_test)
print (acc)


0.8039991810550741


# From the start

In [None]:


ds = AdultDataset()
dataset_orig = load_preproc_data_adult(['sex'])
priv = [{'sex': 1}]
unpriv = [{'sex': 0}]
train, test = dataset_orig.split([0.7], shuffle=True)
scaler = MinMaxScaler()




#### The function below sets the way in which the tradeoff between accuracy and fairness metrics is computed. This will be used later for illustrating the efficiency of reweighting. 

In [None]:
def return_af_tradeoff(acc, eo):
    ftd = (1 - abs(eo) + acc)/2
    return ftd


In [None]:
# Slit the training dataset into 5 folds. 
def ret_folds(train, k = 5):
    f1, f2, f3, f4, f5 = train.split(k, shuffle = True)
    folds = [f1, f2, f3, f4, f5]
    return folds

# Evaluate each sub-set individually and return the output values. 
def eval_k_fold(train, clf, rw=None,k=5):
    
    if rw: 
        train = rw.fit_transform(train)
        
    train_folds = ret_folds(train)
    averages = [0, 0, 0]
    eq_opps = []
    tradeoffs = []
    accuracies = []
    true_ys = []
    y_preds = []
    metrics = {}
    k_test = train_folds[0]
    
    for i in range(k-1, -1, -1):
        for j in range(k-1):
            if j == 0:
                X_train = scaler.fit_transform(train_folds[j].features)
                y_train = train_folds[j].labels.ravel()
                sw = train_folds[j].instance_weights
                true_ys = np.concatenate((y_train, true_ys))
            else:
                if j == i:
                    pass
                else:
                    X_train = np.concatenate((X_train, scaler.fit_transform(train_folds[j].features)))
                    y_train = np.concatenate((y_train, train_folds[j].labels.ravel()))
                    sw = np.concatenate((sw, train_folds[j].instance_weights))
                    true_ys = np.concatenate((true_ys, train_folds[j].labels.ravel()))
        
        X_test = scaler.transform(k_test.features)
        y_test = k_test.labels.ravel()
        clf.fit(X_train, y_train, sample_weight = sw)
        predictions = clf.predict(k_test.features)
        test_pred = k_test.copy()
        test_pred.labels = predictions
        y_preds = np.concatenate((y_preds, test_pred.labels))
        
        metric = ClassificationMetric(k_test, test_pred, unprivileged_groups=unpriv, privileged_groups=priv)
        eo = (metric.equal_opportunity_difference())
        metrics["Fold " + str(i+1) + " metrics"] = {"eq_opp_diff": eo}
        metrics["Fold " + str(i+1) + " metrics"]["accuracy"] = acc = sum(predictions==k_test.labels.ravel())/len(k_test.labels.ravel())
        metrics["Fold " + str(i+1) + " metrics"]["tradeoff"] = return_af_tradeoff(acc, eo)
        
        eq_opps = np.append(eq_opps, eo)
        accuracies = np.append(accuracies, acc)
        tradeoffs = np.append(tradeoffs, return_af_tradeoff(acc, eo))
        
        averages[0] += sum(predictions==k_test.labels.ravel())/len(k_test.labels.ravel())
        averages[1] += (metric.equal_opportunity_difference())
        averages[2] += return_af_tradeoff(acc, eo)
        
        k_test = train_folds[i]
        
    averages = {'mean_acc': averages[0]/k, 'avg_tradeoff': averages[2]/k, 'mean_eq_opp': averages[1]/k}  # Get the average of all the folds.
    ys = {'predicted_ys' : y_preds, 'true_ys' : true_ys}

        
    return metrics, averages, ys, eq_opps
        

In [None]:
# Evaluate Logistic Regression with various parameters and solvers. 
def eval_log_reg(train, silence_print = False, rw = None):
    eo_avgs = []
    acc_avgs = []
    tradeoff_avgs=[]
    models = {}
    
    for solver in logreg_solvers:
        for c in C:
            metrics, averages, ys, eq_opps = eval_k_fold(train, clf = LogisticRegression(solver = solver, C = c), rw = rw)
            models[f'{solver}_LogReg_{c}'] = {'averages':averages}
            models[f'{solver}_LogReg_{c}']['metrics'] = metrics
            models[f'{solver}_LogReg_{c}']['ys'] = ys
            models[f'{solver}_LogReg_{c}']['eq_opps'] = eq_opps
            
            if not silence_print:
                print (f"Results for LogReg with C = {c}, and solver = {solver} : \n \n Averages: {averages}")
                print ("\n")
    return models

# Evaluate Support Vector Classifier with various parameters and kernels. 
def eval_svc(train, silence_print = False, rw = None):
    eo_avgs = []
    acc_avgs = []
    tradeoff_avgs=[]
    models = {}
    
    for kernel in svm_kernels:
        for c in C:
            metrics, averages, ys, eq_opps = eval_k_fold(train, clf = SVC(kernel = kernel, C = c, gamma = g), rw = rw)
            models[f'{kernel}_SVC, c:{c}'] = {'averages':averages}
            models[f'{kernel}_SVC, c:{c}']['metrics'] = metrics
            models[f'{kernel}_SVC, c:{c}']['ys'] = ys
            models[f'{kernel}_SVC, c:{c}']['eq_opps'] = eq_opps

            if not silence_print:
                print (f"Results for SVC with C = {c}, gamma = {g} and kernel = {kernel} : \n \n Averages: {averages}")
                print ("\n")
    return models            

In [None]:
# Assign the different parameters for Logistic Regression and SVC. 

C = [0.00001, 0.002, 0.1, 0.5, 1, 2, 5]
logreg_solvers = ['newton-cg', 'liblinear']
g = 'auto'
svm_kernels = ['linear', 'poly', 'rbf', 'sigmoid']

svc_models = eval_svc(train)
logreg_models = eval_log_reg(train)

Results for SVC with C = 1e-05, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7611803974408813, 'avg_tradeoff': 0.8805901987204408, 'mean_eq_opp': 0.0}


Results for SVC with C = 0.002, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7611804658880206, 'avg_tradeoff': 0.8805902329440103, 'mean_eq_opp': 0.0}


Results for SVC with C = 0.1, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7877387601989981, 'avg_tradeoff': 0.8751632114582095, 'mean_eq_opp': 0.0014299823745114182}


Results for SVC with C = 0.5, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7877387259754284, 'avg_tradeoff': 0.8830924821313193, 'mean_eq_opp': 0.0017860368592069408}


Results for SVC with C = 1, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7877388029784601, 'avg_tradeoff': 0.8808220023333053, 'mean_eq_opp': 0.0022054832781169932}


Results for SVC with C = 2, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.78

In [None]:
# Select the best classifier based on the final tradeoff value. 
def select_best(models):
    best = 0
    best_key = ''
    for key in models.keys():
        if best < models[key]['averages']['avg_tradeoff']:
            best = models[key]['averages']['avg_tradeoff']
            best_key = key
        else:
            pass
    return best_key, best

# Select the classifier with the highest fairness metrics. 
def select_fair(models):
    best = 0
    best_key = ''
    for key in models.keys():
        if best < (1 - (abs(models[key]['averages']['mean_eq_opp']))):
            best = models[key]['averages']['mean_eq_opp']
            best_key = key
        else:
            pass
    return best_key, best

# Select the classifier with the highest accuracy. 
def select_acc(models):
    best = 0
    best_key = ''
    for key in models.keys():
        if best < models[key]['averages']['mean_acc']:
            best = models[key]['averages']['mean_acc']
            best_key = key
        else:
            pass
    return best_key, best

# Compare and choose the better classifier based on a specific comparison criteria. 
def better(lg, svc, lg_val, svc_val):
    if lg_val > svc_val:
        winner = lg
    elif lg_val < svc_val:
        winner = svc

    return winner 

In [None]:
svc_best, svc_val = select_best(svc_models)
lg_best, lg_val = select_best(logreg_models)

svc_fair, svc_fair_val = select_fair(svc_models)
lg_fair, lg_fair_val = select_fair(logreg_models)

svc_acc, svc_acc_val = select_acc(svc_models)
lg_acc, lg_acc_val = select_acc(logreg_models)

print (better(lg_fair, svc_fair, lg_fair_val, svc_fair_val))
print (better(lg_acc, svc_acc, lg_acc_val, svc_acc_val))
print (better(lg_best, svc_best, lg_val, svc_val))

sigmoid_SVC, c:5
liblinear_LogReg_0.1
linear_SVC, c:5


In [None]:
# A function for testing the selected model in a standard scenario. 
def test_selected_model(train, test, clf = None):
    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(train.features)
    y_train = train.labels.ravel()
    X_test = scaler.transform(test.features)
    clf = clf
    clf.fit(X_train,y_train)
    predictions = clf.predict(X_test)
    test_pred = test.copy()
    test_pred.labels = predictions
    acc = sum(predictions==y_test)/len(y_test)
    metric = ClassificationMetric(test, test_pred, unprivileged_groups=unpriv, privileged_groups=priv)
    eo = (metric.equal_opportunity_difference())
    tradeoff = return_af_tradeoff(acc, eo)
    return acc, eo, tradeoff

In [None]:
best_standard_adult = test_selected_model(train, test, clf = SVC(kernel = 'linear', C =5, gamma='auto'))
fair_standard_adult = test_selected_model(train, test, clf = SVC(kernel = 'sigmoid', C =5, gamma='auto'))
accurate_standard_adult = test_selected_model(train, test, clf = LogisticRegression(solver = 'newton-cg', C =2))

### Applying reweighting method

In [None]:
train, test = dataset_orig.split([0.7], shuffle=True)
rw = Reweighing(unprivileged_groups=unpriv,
                privileged_groups=priv)

rw_svc_models = eval_svc(train, silence_print = False, rw = rw) 
rw_logreg_models = eval_log_reg(train, silence_print = False, rw= rw)

Results for SVC with C = 1e-05, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7580216090185609, 'avg_tradeoff': 0.8790108045092804, 'mean_eq_opp': 0.0}


Results for SVC with C = 0.002, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7580216988554312, 'avg_tradeoff': 0.8790108494277156, 'mean_eq_opp': 0.0}


Results for SVC with C = 0.1, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7849016177181922, 'avg_tradeoff': 0.8804105423868108, 'mean_eq_opp': 0.01228580340226818}


Results for SVC with C = 0.5, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7849893626728573, 'avg_tradeoff': 0.8811219391355507, 'mean_eq_opp': 0.012076290266419376}


Results for SVC with C = 1, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.7849895166789208, 'avg_tradeoff': 0.8802325714290671, 'mean_eq_opp': 0.012602127956186027}


Results for SVC with C = 2, gamma = auto and kernel = linear : 
 
 Averages: {'mean_acc': 0.784989

In [None]:
svc_acc, svc_acc_val = select_acc(rw_svc_models)
lg_acc, lg_acc_val = select_acc(rw_logreg_models)

svc_fair, svc_fair_val = select_fair(rw_svc_models)
lg_fair, lg_fair_val = select_fair(rw_logreg_models)

svc_best, svc_best_val = select_best(rw_svc_models)
lg_best, lg_best_val = select_best(rw_logreg_models)

print (better(lg_acc, svc_acc, lg_acc_val, svc_acc_val))
print (better(lg_fair, svc_fair, lg_fair_val, svc_fair_val))
print (better(lg_best, svc_best, lg_best_val, svc_best_val))

poly_SVC, c:1
sigmoid_SVC, c:5
liblinear_LogReg_0.5


In [None]:
accurate_rw_adult = test_selected_model(train, test, clf = SVC(kernel = 'poly', C =1, gamma = 'auto'))
fair_rw_adult = test_selected_model(train, test, clf = SVC(kernel = 'sigmoid', C =5, gamma='auto'))
best_rw_adult = test_selected_model(train, test, clf = SVC(kernel = 'poly', C =5, gamma='auto'))



# Create a table with the final results of the classifiers with the best performance on accuracy, fairness, and tradeoff between the two. 

In [None]:
final_reports_adult = {'Standard_Fair': {'Sigmoid_SVC, C = 5':fair_standard_adult}, 'Standard_Accurate': {'poly_SVC, C = 1': accurate_standard_adult}, 'Standard_Best':{'Poly_SVC, C = 5':best_standard_adult} , 'RW_Fair':{'Sigmoid_SVC, C = 5':fair_rw_adult} , 'RW_Accurate':{'Poly_SVC, C = 5':accurate_rw_adult} , 'RW_Best':{'Poly_SVC, C = 5':best_rw_adult} }


In [None]:
df_adult = pd.DataFrame.from_dict(final_reports_adult)

In [None]:
df_adult.T

Unnamed: 0,"Sigmoid_SVC, C = 5","poly_SVC, C = 1","Poly_SVC, C = 5"
Standard_Fair,"(0.6621169726335904, 0.005542571533930618, 0.8...",,
Standard_Accurate,,"(0.6860028663072408, -0.4581099195710456, 0.61...",
Standard_Best,,,"(0.6538592779635569, 0.020348924125696444, 0.8..."
RW_Fair,"(0.6714665938715622, -0.2771732207701096, 0.69...",,
RW_Accurate,,,"(0.6867535658226984, -0.4557752341311134, 0.61..."
RW_Best,,,"(0.6822493687299529, -0.46999653139091224, 0.6..."
