In [None]:
# to run on colab
import os, sys
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
# nb_path = '/content/notebooks'
# os.symlink('/content/drive/My Drive/Colab Notebooks', nb_path)
# sys.path.insert(0, nb_path)

In [None]:
#!pip install --target=$nb_path aif360[all]
!pip install aif360[all]
#also the cw will make use two aif360 datasets: adult and german

In [None]:
print(os.getcwd())
os.chdir(r'/usr/local/lib/python3.7/dist-packages/aif360/data/raw/adult')
print(os.getcwd())

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test

In [None]:
print(os.getcwd())
os.chdir(r'/usr/local/lib/python3.7/dist-packages/aif360/data/raw/german')
print(os.getcwd())

In [None]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/german/german.data
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/german/german.data-numeric
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/german/german.doc

In [None]:
print(os.getcwd())
os.chdir(r'/content/drive/MyDrive/cw2')
print(os.getcwd())

In [None]:
from collections import OrderedDict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from aif360.datasets import AdultDataset, GermanDataset
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult, load_preproc_data_german
# from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing
# from aif360.algorithms.postprocessing.reject_option_classification\
#         import RejectOptionClassification
from aif360.algorithms.postprocessing import EqOddsPostprocessing

from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.metrics import ClassificationMetric
from aif360.metrics import BinaryLabelDatasetMetric
# from aif360.sklearn.metrics import disparate_impact_ratio
# from aif360.sklearn.metrics import make_scorer
from aif360.algorithms.preprocessing.reweighing import Reweighing

import pdb

from sklearn.preprocessing import StandardScaler  #MinMaxScaler
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

# from sklearn.model_selection import *

In [None]:
# set the controlled hyperparameters for lr and svm estimators
SOLVER = 'liblinear'
MAX_ITER = 10000

In [None]:
def to_dataframes(aif360_ds):
    X = pd.DataFrame(aif360_ds.features, columns=aif360_ds.feature_names)
    y = pd.Series(aif360_ds.labels.ravel(), name=aif360_ds.label_names[0])
    return X, y

def compare_lsts(l1, l2):
    result = all(map(lambda x, y: x == y, l1, l2))
    return result and len(l1) == len(l2)

def sanity_check_before_nsf(dataset):
    """checks the integrity of data after the to_dataframes step by comparing it against original data
    """
    X_1 = to_dataframes(dataset)[0].to_numpy()
    y_1 = to_dataframes(dataset)[1].to_numpy()
    X_2 = dataset.features
    y_2 = dataset.labels
    assert len(X_1) == len(X_2) and len(y_1) == len(y_2)
    for i in range(len(X_1)):
        assert compare_lsts(X_1[i], X_2[i])
    for j in range(len(y_1)):
        assert y_1[j] == y_2[j]

    return None

def train_model(model, dataset, sensitive_attr=None):
    scaler = StandardScaler()
    if sensitive_attr is None:
        X = scaler.fit_transform(dataset.features)
    else:
        #sanity_check_before_nsf(dataset)
        # X_nsf is the input features with no sensitive features
        X_nsf = to_dataframes(dataset)[0].drop(columns=sensitive_attr).to_numpy()
        X = scaler.fit_transform(X_nsf)
        
    y = dataset.labels.ravel()
    model.fit(X, y)
    return model

def evaluate(model, dataset, unprivileged_groups, privileged_groups, sensitive_attr=None):
    scaler = StandardScaler()
    if sensitive_attr is None:
        X = scaler.fit_transform(dataset.features)
    else:
        # X_nsf is the input features with no sensitive features
        X_nsf = to_dataframes(dataset)[0].drop(columns=sensitive_attr).to_numpy()
        X = scaler.fit_transform(X_nsf)
    
    y = dataset.labels.ravel()
    predictions = model.predict(X)
    accuracy = accuracy_score(y, predictions)
    dataset_pred = dataset.copy()
    dataset_pred.labels = predictions
    fairness_metrics = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, 
                                            privileged_groups=privileged_groups)
#     disparate_impact = fairness_metrics.disparate_impact()
#     stats_parity_diff = fairness_metrics.statistical_parity_difference()
    eq_opp_diff = fairness_metrics.equal_opportunity_difference()
    avr_odds_diff = fairness_metrics.average_odds_difference()
    return {'accuracy': accuracy, 'eq_opp_diff': eq_opp_diff, 'avr_odds_diff': avr_odds_diff}

def split_aif360_dataset(dataset, num_fold):
    cumulative_ratio = 0
    ratio_lst = []
    for i in range(num_fold-1):
        cumulative_ratio += 1/num_fold
        ratio_lst.append(cumulative_ratio)
    return dataset.split(ratio_lst, shuffle=True)

def get_s_ratio_lst(num_fold):
    """get the split ratios given total number of folds"""
    cumulative_ratio = 0
    ratio_lst = []
    for i in range(num_fold-1):
        cumulative_ratio += 1/num_fold
        ratio_lst.append(cumulative_ratio)
        
    return ratio_lst

def get_fold_idx_iterables(dataset, num_fold):
    """get the fold index list iterables as a nested list(each corresponds to each fold)
       args: dataset:aif360 dataset; num_fold: total number of folds > 2 (if = 2 use split_aif360_dataset)
    """
    ratio_lst = get_s_ratio_lst(num_fold)
    idx_iters = []
    for n, perc in enumerate(ratio_lst):
        if n == 0:
            idx_iters.append([idx for idx in range(int(len(dataset.features)*perc))])
            continue
        
        idx_iters.append(
            [idx for idx in range(int(len(dataset.features)*ratio_lst[n-1]), int(len(dataset.features)*perc))]
            )
        
        if n == len(ratio_lst) - 1:
            idx_iters.append(
                [idx for idx in range(int(len(dataset.features)*perc), int(len(dataset.features)))]
                )
            break
    
    return idx_iters


def combined_metric(acc, fairness):
    """for task 3 model selection criterion: define the new criterion by assigning weights to accuracy and fairness"""
    acc_weight = 1
    fairness_weight = -1.5
    return acc_weight*acc + fairness_weight*fairness

def cv_results_vis(results, C_val_array, tasknum):
    """Visualizes the model selection process"""
    lr_acc_results = [acc for _,acc,_ in results[:len(results)//2]]
    lr_eq_opp_diff_results = [eq_opp_diff for _,_,eq_opp_diff in results[:len(results)//2]]
    svm_acc_results = [acc for _,acc,_ in results[len(results)//2:]]
    svm_eq_opp_diff_results = [eq_opp_diff for _,_,eq_opp_diff in results[len(results)//2:]]
    lr_combined_results = [combined_metric(acc, abs(fair)) for acc, fair in zip(lr_acc_results, lr_eq_opp_diff_results)]
    svm_combined_results = [combined_metric(acc, abs(fair)) for acc, fair in zip(svm_acc_results, svm_eq_opp_diff_results)]
    
#     fig = plt.figure()
#     ax1 = fig.add_subplot(211)
#     ax2 = fig.add_subplot(212)

    fig, axes = plt.subplots(2,1, figsize=(14,14))
    
    axes[0].scatter(C_val_array, lr_acc_results, c='yellow')
    axes[0].scatter(C_val_array, svm_acc_results, c='red')
    axes[0].plot(C_val_array, lr_acc_results, color='yellow', label='lr')
    axes[0].plot(C_val_array, svm_acc_results, color='red', label='svm')
    axes[0].legend()
    axes[0].grid()
    axes[0].semilogx(basex=10)
    axes[0].set_xlabel("hyperparameter C in logscale with a logbase of 10", fontsize=15)
    axes[0].set_ylabel("accuracy", fontsize=15)
    axes[0].set_title("Plot of hyperparameter against accuracy metric", weight='bold')
    
    axes[1].scatter(C_val_array, lr_eq_opp_diff_results, c='yellow')
    axes[1].scatter(C_val_array, svm_eq_opp_diff_results, c='red')
    axes[1].plot(C_val_array, lr_eq_opp_diff_results, color='yellow', label='lr')
    axes[1].plot(C_val_array, svm_eq_opp_diff_results, color='red', label='svm')
    axes[1].legend()
    axes[1].grid()
    axes[1].semilogx(basex=10)
    axes[1].set_xlabel("hyperparameter C in logscale with a logbase of 10", fontsize=15)
    axes[1].set_ylabel("equality of opportunity difference", fontsize=15)
    axes[1].set_title("Plot of hyperparameter against fairness metric", weight='bold')
    
    fig.tight_layout()
#     plt.subplot_tool()
    plt.savefig(f"task_{tasknum}_model_selection.png")
    plt.show()

    
    # also need to plot for the task 3 model selection at this stage
    task3_fig = plt.figure(figsize =(13, 13))
    plt.scatter(C_val_array, lr_combined_results, c='yellow')
    plt.scatter(C_val_array, svm_combined_results, c='red')
    plt.plot(C_val_array, lr_combined_results, color='yellow', label='lr')
    plt.plot(C_val_array, svm_combined_results, color='red', label='svm')
    plt.legend()
    plt.grid()
    plt.semilogx(basex=10)
    plt.xlabel("hyperparameter C in logscale with a logbase of 10", fontsize=14)
    plt.ylabel("combined accuracy and fairness metric", fontsize=14)
    plt.title("Plot of hyperparameter against combined metric", weight='bold')
    
    plt.savefig(f"task_3_model_selection_with_task_{tasknum}_data.png")
    plt.show()
    
            
    return None

def cross_validate_search(dataset, 
                   estimators, 
                   params, 
                   fold_num, 
                   unprivileged_groups, 
                   privileged_groups,
                   sensitive_attr=None,
                   visualization=True):
    """
    Perform multi-model cross validation and hyperparam search with fold_num number of folds
    """
    idx_iters = get_fold_idx_iterables(dataset, fold_num)
    
    # initialize metric data arrays
    accuracy_array = np.zeros(len(params['lr']['lr_C']) + len(params['svm']['svm_C'])).reshape(1, -1)
    eq_opp_diff_array = np.zeros(len(params['lr']['lr_C']) + len(params['svm']['svm_C'])).reshape(1, -1)
    
    for test_id in range(fold_num):
        test_idx_lst = idx_iters[test_id]
        train_idx_lst = []
        for idx, idx_iter in enumerate(idx_iters): 
            if idx!=test_id:
                train_idx_lst += idx_iter
        
        # now can get the test and train dataset under this fold
        test_dataset = dataset.subset(test_idx_lst)
        train_dataset =  dataset.subset(train_idx_lst)
        
        # print(len(train_dataset.features), len(test_dataset.features))
        
        # initialize the list of models each with its hyperparam choice
        lr_model_lst = [estimators['lr'](C=C_val, solver=SOLVER, random_state=1) for C_val in params['lr']['lr_C']]
        svm_model_lst = [estimators['svm'](C=C_val, max_iter=MAX_ITER, random_state=1) for C_val in params['svm']['svm_C']]
        model_lst = lr_model_lst + svm_model_lst
        
        # train the model list
        if sensitive_attr is None:
            trained_model_lst = [train_model(model, train_dataset) for model in model_lst]
        else:
            trained_model_lst = [train_model(model, train_dataset, sensitive_attr=sensitive_attr) for model in model_lst]
        
        # test the trained models on test dataset
        if sensitive_attr is None:
            accuracy_data = np.array([evaluate(model, test_dataset, unprivileged_groups, privileged_groups)['accuracy'] 
                             for model in trained_model_lst]).reshape(1,-1)
            eq_opp_diff_data = np.array([evaluate(model, test_dataset, unprivileged_groups, privileged_groups)['eq_opp_diff'] 
                                for model in trained_model_lst]).reshape(1,-1)
        else:
            accuracy_data = np.array([evaluate(model, test_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)['accuracy'] 
                             for model in trained_model_lst]).reshape(1,-1)
            eq_opp_diff_data = np.array([evaluate(model, test_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)['eq_opp_diff'] 
                                for model in trained_model_lst]).reshape(1,-1)

        accuracy_array = np.concatenate((accuracy_array, accuracy_data), axis=0)
        eq_opp_diff_array = np.concatenate((eq_opp_diff_array, eq_opp_diff_data), axis=0)
    
    
    # get the average results from all folds
    accuracy_array = np.sum(accuracy_array, axis=0)/fold_num
    eq_opp_diff_array = np.sum(eq_opp_diff_array, axis=0)/fold_num
    
    results = [['model', 'accuracy', 'eq_opp_diff']]
    
    # generate the result table model information column (first column)
    info_lst = [key + f"_{np.round(C, 4)}" for dct in params.values() for key, val in dct.items() for C in val]
        
    results += [
               [model, accuracy, eq_opp_diff]
               for model, accuracy, eq_opp_diff in zip(info_lst, accuracy_array, eq_opp_diff_array)
            ]
    
    # plot the C_vals against the acc and fairness results, for all models
    if visualization:
        cv_results_vis(results[1:], params['lr']['lr_C'], 1)
    
    return pd.DataFrame(results[1:], columns=results[0]).set_index('model').sort_values(by='accuracy', ascending=False), \
           pd.DataFrame(results[1:], columns=results[0]).set_index('model').sort_values(by='eq_opp_diff', key=lambda x: abs(x))



In [None]:
#with reweighing
def train_model_w_reweigh(model, dataset, unprivileged_groups, privileged_groups, sensitive_attr=None):
    scaler = StandardScaler()
    if sensitive_attr is None:
        X = scaler.fit_transform(dataset.features)
    else:
        # X_nsf is the input features with no sensitive features
        X_nsf = to_dataframes(dataset)[0].drop(columns=sensitive_attr).to_numpy()
        X = scaler.fit_transform(X_nsf)
    
    y = dataset.labels.ravel()
    
    # reweighing
    RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
    #We obtain a set of weights for the training set, to use in scikit-learn.
    dataset = RW.fit_transform(dataset)
    
    model.fit(X, y, sample_weight=dataset.instance_weights)
    return model

def evaluate_w_reweigh(model, dataset, unprivileged_groups, privileged_groups, sensitive_attr=None):
    """the evaluation for reweighing fairness method, no actual reweighing of test data, 
       but need to add an additional resize step to predictions
    """
    scaler = StandardScaler()
    if sensitive_attr is None:
        X = scaler.fit_transform(dataset.features)
    else:
        # X_nsf is the input features with no sensitive features
        X_nsf = to_dataframes(dataset)[0].drop(columns=sensitive_attr).to_numpy()
        X = scaler.fit_transform(X_nsf)
    
    y = dataset.labels.ravel()
    predictions = model.predict(X)
    accuracy = accuracy_score(y, predictions)
    dataset_pred = dataset.copy()
    predictions.resize((len(predictions),1))
    dataset_pred.labels = predictions
    fairness_metrics = ClassificationMetric(dataset, dataset_pred, unprivileged_groups=unprivileged_groups, 
                                            privileged_groups=privileged_groups)
#     disparate_impact = fairness_metrics.disparate_impact()
#     stats_parity_diff = fairness_metrics.statistical_parity_difference()
    eq_opp_diff = fairness_metrics.equal_opportunity_difference()
    avr_odds_diff = fairness_metrics.average_odds_difference()
    return {'accuracy': accuracy, 'eq_opp_diff': eq_opp_diff, 'avr_odds_diff': avr_odds_diff}

def cross_validate_search_w_reweigh(dataset, 
                   estimators, 
                   params, 
                   fold_num, 
                   unprivileged_groups, 
                   privileged_groups,
                   sensitive_attr=None,
                   visualization=True):
    """
    Perform multi-model cross validation and hyperparam search with fold_num number of folds
    """
    idx_iters = get_fold_idx_iterables(dataset, fold_num)
    
    # initialize metric data arrays
    accuracy_array = np.zeros(len(params['lr']['lr_C']) + len(params['svm']['svm_C'])).reshape(1, -1)
    eq_opp_diff_array = np.zeros(len(params['lr']['lr_C']) + len(params['svm']['svm_C'])).reshape(1, -1)
    
    for test_id in range(fold_num):
        test_idx_lst = idx_iters[test_id]
        train_idx_lst = []
        for idx, idx_iter in enumerate(idx_iters): 
            if idx!=test_id:
                train_idx_lst += idx_iter
        
        # now can get the test and train dataset under this fold
        test_dataset = dataset.subset(test_idx_lst)
        train_dataset =  dataset.subset(train_idx_lst)
        
        
        # initialize the list of models each with its hyperparam choice
        lr_model_lst = [estimators['lr'](C=C_val, solver=SOLVER, random_state=1) for C_val in params['lr']['lr_C']]
        svm_model_lst = [estimators['svm'](C=C_val, max_iter=MAX_ITER, random_state=1) for C_val in params['svm']['svm_C']]
        model_lst = lr_model_lst + svm_model_lst
        
        # train the model list
        if sensitive_attr is None:
            trained_model_lst = [train_model_w_reweigh(model, train_dataset, unprivileged_groups, privileged_groups) 
                                 for model in model_lst]
        else:
            trained_model_lst = [train_model_w_reweigh(model, train_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr) 
                                 for model in model_lst]
        
        # test the trained models on test dataset
        if sensitive_attr is None:
            accuracy_data = np.array([evaluate_w_reweigh(model, test_dataset, unprivileged_groups, privileged_groups)['accuracy'] 
                             for model in trained_model_lst]).reshape(1,-1)
            eq_opp_diff_data = np.array([evaluate_w_reweigh(model, test_dataset, unprivileged_groups, privileged_groups)['eq_opp_diff'] 
                                for model in trained_model_lst]).reshape(1,-1)
        else:
            accuracy_data = np.array([evaluate_w_reweigh(model, test_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)['accuracy'] 
                             for model in trained_model_lst]).reshape(1,-1)
            eq_opp_diff_data = np.array([evaluate_w_reweigh(model, test_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)['eq_opp_diff'] 
                                for model in trained_model_lst]).reshape(1,-1)
        
        accuracy_array = np.concatenate((accuracy_array, accuracy_data), axis=0)
        eq_opp_diff_array = np.concatenate((eq_opp_diff_array, eq_opp_diff_data), axis=0)
    
    
    # get the average results from all folds
    accuracy_array = np.sum(accuracy_array, axis=0)/fold_num
    eq_opp_diff_array = np.sum(eq_opp_diff_array, axis=0)/fold_num
    
    results = [['model', 'accuracy', 'eq_opp_diff']]
    
    # generate the result table model information column (first column)
    info_lst = [key + f"_{np.round(C, 4)}" for dct in params.values() for key, val in dct.items() for C in val]
        
    results += [
               [model, accuracy, eq_opp_diff]
               for model, accuracy, eq_opp_diff in zip(info_lst, accuracy_array, eq_opp_diff_array)
            ]
    
    # plot the C_vals against the acc and fairness results, for all models
    if visualization:
        cv_results_vis(results[1:], params['lr']['lr_C'], 2)
    
    return pd.DataFrame(results[1:], columns=results[0]).set_index('model').sort_values(by='accuracy', ascending=False), \
           pd.DataFrame(results[1:], columns=results[0]).set_index('model').sort_values(by='eq_opp_diff', key=lambda x: abs(x))


In [None]:
#with Equalized Odds Post-processing
def evaluate_w_eop(model, dataset, unprivileged_groups, privileged_groups, sensitive_attr=None):
    scaler = StandardScaler()
    if sensitive_attr is None:
        X = scaler.fit_transform(dataset.features)
    else:
        # X_nsf is the input features with no sensitive features
        X_nsf = to_dataframes(dataset)[0].drop(columns=sensitive_attr).to_numpy()
        X = scaler.fit_transform(X_nsf)
    
    y = dataset.labels.ravel()
    predictions = model.predict(X)
    dataset_pred = dataset.copy()
    predictions.resize((len(predictions),1))
    dataset_pred.labels = predictions
    # apply the Equalized Odds post-processing method
    eop = EqOddsPostprocessing(privileged_groups = privileged_groups,
                                     unprivileged_groups = unprivileged_groups,
                                     seed=0)
    eop = eop.fit(dataset, dataset_pred)
    transformed_dataset_pred = eop.predict(dataset_pred)
    accuracy = accuracy_score(y,  transformed_dataset_pred.labels.ravel())
    fairness_metrics = ClassificationMetric(dataset, transformed_dataset_pred, unprivileged_groups=unprivileged_groups, 
                                            privileged_groups=privileged_groups)
#     disparate_impact = fairness_metrics.disparate_impact()
#     stats_parity_diff = fairness_metrics.statistical_parity_difference()
    eq_opp_diff = fairness_metrics.equal_opportunity_difference()
    avr_odds_diff = fairness_metrics.average_odds_difference()
    return {'accuracy': accuracy, 'eq_opp_diff': eq_opp_diff, 'avr_odds_diff': avr_odds_diff}

def cross_validate_search_w_eop(dataset, 
                   estimators, 
                   params, 
                   fold_num, 
                   unprivileged_groups, 
                   privileged_groups,
                   sensitive_attr=None,
                   visualization=True):
    """
    Perform multi-model cross validation and hyperparam search with fold_num number of folds
    """
    idx_iters = get_fold_idx_iterables(dataset, fold_num)
    
    # initialize metric data arrays
    accuracy_array = np.zeros(len(params['lr']['lr_C']) + len(params['svm']['svm_C'])).reshape(1, -1)
    eq_opp_diff_array = np.zeros(len(params['lr']['lr_C']) + len(params['svm']['svm_C'])).reshape(1, -1)
    
    for test_id in range(fold_num):
        test_idx_lst = idx_iters[test_id]
        train_idx_lst = []
        for idx, idx_iter in enumerate(idx_iters): 
            if idx!=test_id:
                train_idx_lst += idx_iter
        
        # now can get the test and train dataset under this fold
        test_dataset = dataset.subset(test_idx_lst)
        train_dataset =  dataset.subset(train_idx_lst)
        
        # print(len(train_dataset.features), len(test_dataset.features))
        
        # initialize the list of models each with its hyperparam choice
        lr_model_lst = [estimators['lr'](C=C_val, solver=SOLVER, random_state=1) for C_val in params['lr']['lr_C']]
        svm_model_lst = [estimators['svm'](C=C_val, max_iter=MAX_ITER, random_state=1) for C_val in params['svm']['svm_C']]
        model_lst = lr_model_lst + svm_model_lst
        
        # train the model list
        if sensitive_attr is None:
            trained_model_lst = [train_model(model, train_dataset) for model in model_lst]
        else:
            trained_model_lst = [train_model(model, train_dataset, sensitive_attr=sensitive_attr) for model in model_lst]
        
        # test the trained models on test dataset
        if sensitive_attr is None:
            accuracy_data = np.array([evaluate_w_eop(model, test_dataset, unprivileged_groups, privileged_groups)['accuracy'] 
                             for model in trained_model_lst]).reshape(1,-1)
            eq_opp_diff_data = np.array([evaluate_w_eop(model, test_dataset, unprivileged_groups, privileged_groups)['eq_opp_diff'] 
                                for model in trained_model_lst]).reshape(1,-1)
        else:
            accuracy_data = np.array([evaluate_w_eop(model, test_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)['accuracy'] 
                             for model in trained_model_lst]).reshape(1,-1)
            eq_opp_diff_data = np.array([evaluate_w_eop(model, test_dataset, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)['eq_opp_diff'] 
                                for model in trained_model_lst]).reshape(1,-1)

        accuracy_array = np.concatenate((accuracy_array, accuracy_data), axis=0)
        eq_opp_diff_array = np.concatenate((eq_opp_diff_array, eq_opp_diff_data), axis=0)
    
    
    # get the average results from all folds
    accuracy_array = np.sum(accuracy_array, axis=0)/fold_num
    eq_opp_diff_array = np.sum(eq_opp_diff_array, axis=0)/fold_num
    
    results = [['model', 'accuracy', 'eq_opp_diff']]
    
    # generate the result table model information column (first column)
    info_lst = [key + f"_{np.round(C, 4)}" for dct in params.values() for key, val in dct.items() for C in val]
        
    results += [
               [model, accuracy, eq_opp_diff]
               for model, accuracy, eq_opp_diff in zip(info_lst, accuracy_array, eq_opp_diff_array)
            ]
    
    # plot the C_vals against the acc and fairness results, for all models
    if visualization:
        cv_results_vis(results[1:], params['lr']['lr_C'], 1)
    
    return pd.DataFrame(results[1:], columns=results[0]).set_index('model').sort_values(by='accuracy', ascending=False), \
           pd.DataFrame(results[1:], columns=results[0]).set_index('model').sort_values(by='eq_opp_diff', key=lambda x: abs(x))



In [None]:
@ignore_warnings(category=ConvergenceWarning)
def full_run_for_ds(rseeds,
                    privileged_groups,
                    unprivileged_groups,
                    dataset_orig,
                    estimators, 
                    parameters, 
                    cv_fold_num, 
                    save_results=False):
    """for a specified dataset, runs the entirety of the 3 tasks' pipelines and repeat for 5 times,
       each time with an initial split controlled by the random seed from the rseeds list
    """
    
    final_results = [['repeats',
                      'model_1_accuracy',
                      'model_1_eq_opp_diff',
                      'model_2_accuracy',
                      'model_2_eq_opp_diff',
                      'model_3_accuracy',
                      'model_3_eq_opp_diff',
                      'model_4_accuracy',
                      'model_4_eq_opp_diff',
                      'model_5_accuracy',
                      'model_5_eq_opp_diff',
                      'model_6_accuracy',
                      'model_6_eq_opp_diff']]

    for i, rseed in enumerate(rseeds):
        repeat_num = i + 1

        results_per_repeat = [f"Repeat_{repeat_num}"]

        # set the random seed
        np.random.seed(rseed)

        # task 1: first shuffle and split the whole dataset into train-test
        train, test = dataset_orig.split([0.7], shuffle=True)
        print('\nTotal number of training examples:', len(train.features))


        
        task_1_results_dataframes = cross_validate_search(train, 
                      estimators, 
                      parameters, 
                      cv_fold_num, 
                      unprivileged_groups, 
                      privileged_groups,
                      visualization=True)

        print(f"\nAt repeat number {repeat_num}, the task 1 cv results are:\n\n", task_1_results_dataframes[0], '\n\n', task_1_results_dataframes[1])
        
        
        # test the selected models on held-out test dataset
        # best model based on accuracy(model 1):
        acc_model = task_1_results_dataframes[0].reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_1 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_1 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)

        # best model based on fairness(model 2):
        fair_model = task_1_results_dataframes[1].reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_2 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_2 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)

        # train the selected models
        model_1_trained = train_model(model_1, train)
        model_2_trained = train_model(model_2, train)

        model_1_results = evaluate(model_1_trained, test, unprivileged_groups, privileged_groups)
        model_2_results = evaluate(model_2_trained, test, unprivileged_groups, privileged_groups)

        results_per_repeat.append(model_1_results['accuracy'])
        results_per_repeat.append(model_1_results['eq_opp_diff'])
        results_per_repeat.append(model_2_results['accuracy'])
        results_per_repeat.append(model_2_results['eq_opp_diff'])

        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_1: {acc_model_type}_C_{acc_C_best}',
                        model_1_results['accuracy'],
                        model_1_results['eq_opp_diff']
                      ])
        results.append([f'model_2: {fair_model_type}_C_{fair_C_best}',
                        model_2_results['accuracy'],
                        model_2_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_1 & model_2 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_1_results.csv")


        # task 2 with reweighing
        task_2_results_dataframes = cross_validate_search_w_reweigh(train, 
               estimators, 
               parameters, 
               cv_fold_num, 
               unprivileged_groups, 
               privileged_groups,
               visualization=True)

        print(f"\nAt repeat number {repeat_num}, the task 2 cv results are:\n\n", task_2_results_dataframes[0], '\n\n', task_2_results_dataframes[1])
        
        # test the selected models from task 2 on held-out test dataset
        # best model based on accuracy(model 3):
        acc_model = task_2_results_dataframes[0].reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_3 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_3 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)

        # best model based on fairness(model 4):
        fair_model = task_2_results_dataframes[1].reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_4 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_4 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)

        # train the selected models
        model_3_trained = train_model_w_reweigh(model_3, train, unprivileged_groups, privileged_groups)
        model_4_trained = train_model_w_reweigh(model_4, train, unprivileged_groups, privileged_groups)

        model_3_results = evaluate_w_reweigh(model_3_trained, test, unprivileged_groups, privileged_groups)
        model_4_results = evaluate_w_reweigh(model_4_trained, test, unprivileged_groups, privileged_groups)

        results_per_repeat.append(model_3_results['accuracy'])
        results_per_repeat.append(model_3_results['eq_opp_diff'])
        results_per_repeat.append(model_4_results['accuracy'])
        results_per_repeat.append(model_4_results['eq_opp_diff'])

        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_3: {acc_model_type}_C_{acc_C_best}',
                        model_3_results['accuracy'],
                        model_3_results['eq_opp_diff']
                      ])
        results.append([f'model_4: {fair_model_type}_C_{fair_C_best}',
                        model_4_results['accuracy'],
                        model_4_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_3 & model_4 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_2_results.csv")
        
        
        # Task 3: model selection based on both accuracy and fairness
        
        # first for the fairness method(reweighing from task2) based model(model 5)
        combined_fairness_df = task_2_results_dataframes[0]
        combined_fairness_df['combinedmetric'] = combined_metric(combined_fairness_df['accuracy'], 
                                                                 abs(combined_fairness_df['eq_opp_diff']))
        combined_fairness_df = combined_fairness_df.sort_values(by='combinedmetric', ascending=False)
        
        # select the model 5 as the best model based on combinedmetric result using task 2 method
        fair_model = combined_fairness_df.reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_5 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_5 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)
        
        # train and test the model 5
        model_5_trained = train_model_w_reweigh(model_5, train, unprivileged_groups, privileged_groups)
        model_5_results = evaluate_w_reweigh(model_5_trained, test, unprivileged_groups, privileged_groups)
        
        results_per_repeat.append(model_5_results['accuracy'])
        results_per_repeat.append(model_5_results['eq_opp_diff'])
        
        # now for the standard method model(from task 1)
        combined_standard_df = task_1_results_dataframes[0]
        combined_standard_df['combinedmetric'] = combined_metric(combined_standard_df['accuracy'], 
                                                                 abs(combined_standard_df['eq_opp_diff']))
        combined_standard_df = combined_standard_df.sort_values(by='combinedmetric', ascending=False)
        
        # select the model 6 as the best model based on combinedmetric result using task 1 method
        acc_model = combined_standard_df.reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_6 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_6 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)
        
        # train and test the model 6
        model_6_trained = train_model(model_6, train)
        model_6_results = evaluate(model_6_trained, test, unprivileged_groups, privileged_groups)
        
        results_per_repeat.append(model_6_results['accuracy'])
        results_per_repeat.append(model_6_results['eq_opp_diff'])
        
        print(combined_fairness_df)
        print()
        print(combined_standard_df)
        
        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_5: {fair_model_type}_C_{fair_C_best}',
                        model_5_results['accuracy'],
                        model_5_results['eq_opp_diff']
                      ])
        results.append([f'model_6: {acc_model_type}_C_{acc_C_best}',
                        model_6_results['accuracy'],
                        model_6_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_5 & model_6 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_3_results.csv")
        
        
        
        # finished the three tasks
        final_results.append(results_per_repeat)
        
    
    return pd.DataFrame(final_results[1:], columns=final_results[0]).set_index('repeats')



In [None]:
@ignore_warnings(category=ConvergenceWarning)
def full_run_for_ds_w_eop(rseeds,
                    privileged_groups,
                    unprivileged_groups,
                    dataset_orig,
                    estimators, 
                    parameters, 
                    cv_fold_num, 
                    save_results=False):
    """for a specified dataset, runs the entirety of the 3 tasks' pipelines and repeat for 5 times,
       each time with an initial split controlled by the random seed from the rseeds list,
       the task 2 method has been switched from preprocessing reweighing to postprocessing Equalized Odds
    """
    
    final_results = [['repeats',
                      'model_1_accuracy',
                      'model_1_eq_opp_diff',
                      'model_2_accuracy',
                      'model_2_eq_opp_diff',
                      'model_3_accuracy',
                      'model_3_eq_opp_diff',
                      'model_4_accuracy',
                      'model_4_eq_opp_diff',
                      'model_5_accuracy',
                      'model_5_eq_opp_diff',
                      'model_6_accuracy',
                      'model_6_eq_opp_diff']]

    for i, rseed in enumerate(rseeds):
        repeat_num = i + 1

        results_per_repeat = [f"Repeat_{repeat_num}"]

        # set the random seed
        np.random.seed(rseed)

        # task 1: first shuffle and split the whole dataset into train-test
        train, test = dataset_orig.split([0.7], shuffle=True)
        print('\nTotal number of training examples:', len(train.features))


        
        task_1_results_dataframes = cross_validate_search(train, 
                      estimators, 
                      parameters, 
                      cv_fold_num, 
                      unprivileged_groups, 
                      privileged_groups,
                      visualization=False)

        print(f"\nAt repeat number {repeat_num}, the task 1 cv results are:\n\n", task_1_results_dataframes[0], '\n\n', task_1_results_dataframes[1])
        
        
        # test the selected models on held-out test dataset
        # best model based on accuracy(model 1):
        acc_model = task_1_results_dataframes[0].reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_1 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_1 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)

        # best model based on fairness(model 2):
        fair_model = task_1_results_dataframes[1].reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_2 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_2 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)

        # train the selected models
        model_1_trained = train_model(model_1, train)
        model_2_trained = train_model(model_2, train)

        model_1_results = evaluate(model_1_trained, test, unprivileged_groups, privileged_groups)
        model_2_results = evaluate(model_2_trained, test, unprivileged_groups, privileged_groups)

        results_per_repeat.append(model_1_results['accuracy'])
        results_per_repeat.append(model_1_results['eq_opp_diff'])
        results_per_repeat.append(model_2_results['accuracy'])
        results_per_repeat.append(model_2_results['eq_opp_diff'])

        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_1: {acc_model_type}_C_{acc_C_best}',
                        model_1_results['accuracy'],
                        model_1_results['eq_opp_diff']
                      ])
        results.append([f'model_2: {fair_model_type}_C_{fair_C_best}',
                        model_2_results['accuracy'],
                        model_2_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_1 & model_2 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_1_results.csv")


        # task 2: with Equalized Odds
        task_2_results_dataframes = cross_validate_search_w_eop(train, 
                       estimators, 
                       parameters, 
                       cv_fold_num, 
                       unprivileged_groups, 
                       privileged_groups,
                       visualization=False)

        print(f"\nAt repeat number {repeat_num}, the task 2 cv results are:\n\n", task_2_results_dataframes[0], '\n\n', task_2_results_dataframes[1])
        
        # test the selected models from task 2 on held-out test dataset
        # best model based on accuracy(model 3):
        acc_model = task_2_results_dataframes[0].reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_3 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_3 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)

        # best model based on fairness(model 4):
        fair_model = task_2_results_dataframes[1].reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_4 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_4 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)

        # train the selected models
        model_3_trained = train_model(model_3, train)
        model_4_trained = train_model(model_4, train)

        model_3_results = evaluate_w_eop(model_3_trained, test, unprivileged_groups, privileged_groups)
        model_4_results = evaluate_w_eop(model_4_trained, test, unprivileged_groups, privileged_groups)

        results_per_repeat.append(model_3_results['accuracy'])
        results_per_repeat.append(model_3_results['eq_opp_diff'])
        results_per_repeat.append(model_4_results['accuracy'])
        results_per_repeat.append(model_4_results['eq_opp_diff'])

        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_3: {acc_model_type}_C_{acc_C_best}',
                        model_3_results['accuracy'],
                        model_3_results['eq_opp_diff']
                      ])
        results.append([f'model_4: {fair_model_type}_C_{fair_C_best}',
                        model_4_results['accuracy'],
                        model_4_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_3 & model_4 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_2_results.csv")
        
        # Task 3: model selection based on both accuracy and fairness
        # first for the fairness method(reweighing from task2) based model(model 5)
        combined_fairness_df = task_2_results_dataframes[0]
        combined_fairness_df['combinedmetric'] = combined_metric(combined_fairness_df['accuracy'], 
                                                                 abs(combined_fairness_df['eq_opp_diff']))
        combined_fairness_df = combined_fairness_df.sort_values(by='combinedmetric', ascending=False)
        
        # select the model 5 as the best model based on combinedmetric result using task 2 method
        fair_model = combined_fairness_df.reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_5 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_5 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)
        
        # train and test the model 5
        model_5_trained = train_model(model_5, train)
        model_5_results = evaluate_w_eop(model_5_trained, test, unprivileged_groups, privileged_groups)
        
        results_per_repeat.append(model_5_results['accuracy'])
        results_per_repeat.append(model_5_results['eq_opp_diff'])
        
        # now for the standard method model(from task 1)
        combined_standard_df = task_1_results_dataframes[0]
        combined_standard_df['combinedmetric'] = combined_metric(combined_standard_df['accuracy'], 
                                                                 abs(combined_standard_df['eq_opp_diff']))
        combined_standard_df = combined_standard_df.sort_values(by='combinedmetric', ascending=False)
        
        # select the model 6 as the best model based on combinedmetric result using task 1 method
        acc_model = combined_standard_df.reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_6 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_6 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)
        
        # train and test the model 6
        model_6_trained = train_model(model_6, train)
        model_6_results = evaluate(model_6_trained, test, unprivileged_groups, privileged_groups)
        
        results_per_repeat.append(model_6_results['accuracy'])
        results_per_repeat.append(model_6_results['eq_opp_diff'])
        
        print(combined_fairness_df)
        print()
        print(combined_standard_df)
        
        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_5: {fair_model_type}_C_{fair_C_best}',
                        model_5_results['accuracy'],
                        model_5_results['eq_opp_diff']
                      ])
        results.append([f'model_6: {acc_model_type}_C_{acc_C_best}',
                        model_6_results['accuracy'],
                        model_6_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_5 & model_6 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_3_results.csv")
        
        
    
        # finished the three tasks
        final_results.append(results_per_repeat)
        
    
    return pd.DataFrame(final_results[1:], columns=final_results[0]).set_index('repeats')

In [None]:
# select dataset
dataset_used = "adult"
# dataset_used = "german"

if dataset_used == "adult":
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]
    dataset_orig = load_preproc_data_adult(['sex'])
elif dataset_used == "german":
    privileged_groups = [{'age': 1}]
    unprivileged_groups = [{'age': 0}]
    dataset_orig = load_preproc_data_german(['age'])


# define all the model types to be used and specify the hyperparam(C)'s search range
estimators = {'lr':LogisticRegression, 'svm':svm.LinearSVC}

# C_search_range = np.arange(0.1, 0.5, 0.1)
# C_search_range = [0.001, 0.01, 0.1, 1]
C_search_range = np.logspace(-4, 1, 6)

parameters = OrderedDict(
    [('lr', {'lr_C':C_search_range}),
    ('svm', {'svm_C':C_search_range})]
    )

# rseeds = [0, 99, 199, 299, 999]
rseeds = [1, 222, 444, 888, 248]  # final3
# rseeds = [4, 5, 6, 7, 8] # final4_german
ds_results = full_run_for_ds(rseeds,
                             privileged_groups,
                             unprivileged_groups,
                             dataset_orig,
                             estimators, 
                             parameters, 
                             4, 
                             save_results=True)

In [None]:
# ds_results.to_csv('final3_full.csv')
print(ds_results)
mean = pd.Series(ds_results.mean(), name='mean')
std = pd.Series(ds_results.std(), name='std')
final_stats = pd.concat([mean.to_frame(), std.to_frame()], axis=1).transpose()
final_stats.to_csv('result3.csv')

In [None]:
# select dataset
dataset_used = "adult"
# dataset_used = "german"

if dataset_used == "adult":
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]
    dataset_orig = load_preproc_data_adult(['sex'])
elif dataset_used == "german":
    privileged_groups = [{'age': 1}]
    unprivileged_groups = [{'age': 0}]
    dataset_orig = load_preproc_data_german(['age'])


# define all the model types to be used and specify the hyperparam(C)'s search range
estimators = {'lr':LogisticRegression, 'svm':svm.LinearSVC}

# C_search_range = np.arange(0.1, 0.5, 0.1)
# C_search_range = [0.001, 0.01, 0.1, 1]
C_search_range = np.logspace(-4, 1, 6)

parameters = OrderedDict(
    [('lr', {'lr_C':C_search_range}),
    ('svm', {'svm_C':C_search_range})]
    )

# rseeds = [0, 99, 199, 299, 999]
rseeds = [1, 222, 444, 888, 248]  # final3
# rseeds = [4, 5, 6, 7, 8] # final4_german
ds_eop_results = full_run_for_ds_w_eop(rseeds,
                             privileged_groups,
                             unprivileged_groups,
                             dataset_orig,
                             estimators, 
                             parameters, 
                             4, 
                             save_results=True)

In [None]:
# ds_results.to_csv('final4_german.csv')
ds_eop_results.to_csv('final3_eop4.csv')
print(ds_eop_results)
mean = pd.Series(ds_eop_results.mean(), name='mean')
std = pd.Series(ds_eop_results.std(), name='std')
final_stats = pd.concat([mean.to_frame(), std.to_frame()], axis=1).transpose()
final_stats.to_csv('final3_eop.csv')
final_stats

In [None]:
@ignore_warnings(category=ConvergenceWarning)
def full_run_for_ds_nsf(rseeds,
                    privileged_groups,
                    unprivileged_groups,
                    sensitive_attr,
                    dataset_orig,
                    estimators, 
                    parameters, 
                    cv_fold_num, 
                    save_results=False):
    """for a specified dataset, runs the entirety of the 3 tasks' pipelines and repeat for 5 times,
       each time with an initial split controlled by the random seed from the rseeds list,
       this strictly excludes sensitive attribute from input feature X (nsf: no sensitive feature)
    """
    
    final_results = [['repeats',
                      'model_1_accuracy',
                      'model_1_eq_opp_diff',
                      'model_2_accuracy',
                      'model_2_eq_opp_diff',
                      'model_3_accuracy',
                      'model_3_eq_opp_diff',
                      'model_4_accuracy',
                      'model_4_eq_opp_diff',
                      'model_5_accuracy',
                      'model_5_eq_opp_diff',
                      'model_6_accuracy',
                      'model_6_eq_opp_diff']]

    for i, rseed in enumerate(rseeds):
        repeat_num = i + 1

        results_per_repeat = [f"Repeat_{repeat_num}"]

        # set the random seed
        np.random.seed(rseed)

        # task 1: first shuffle and split the whole dataset into train-test
        train, test = dataset_orig.split([0.7], shuffle=True)
        print('\nTotal number of training examples:', len(train.features))


        
        task_1_results_dataframes = cross_validate_search(train, 
                      estimators, 
                      parameters, 
                      cv_fold_num, 
                      unprivileged_groups, 
                      privileged_groups,
                      sensitive_attr=sensitive_attr,
                      visualization=False)

        print(f"\nAt repeat number {repeat_num}, the task 1 cv results are:\n\n", task_1_results_dataframes[0], '\n\n', task_1_results_dataframes[1])
        
        
        # test the selected models on held-out test dataset
        # best model based on accuracy(model 1):
        acc_model = task_1_results_dataframes[0].reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_1 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_1 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)

        # best model based on fairness(model 2):
        fair_model = task_1_results_dataframes[1].reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_2 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_2 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)

        # train the selected models
        model_1_trained = train_model(model_1, train, sensitive_attr=sensitive_attr)
        model_2_trained = train_model(model_2, train, sensitive_attr=sensitive_attr)

        model_1_results = evaluate(model_1_trained, test, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)
        model_2_results = evaluate(model_2_trained, test, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)

        results_per_repeat.append(model_1_results['accuracy'])
        results_per_repeat.append(model_1_results['eq_opp_diff'])
        results_per_repeat.append(model_2_results['accuracy'])
        results_per_repeat.append(model_2_results['eq_opp_diff'])

        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_1: {acc_model_type}_C_{acc_C_best}',
                        model_1_results['accuracy'],
                        model_1_results['eq_opp_diff']
                      ])
        results.append([f'model_2: {fair_model_type}_C_{fair_C_best}',
                        model_2_results['accuracy'],
                        model_2_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_1 & model_2 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_1_results.csv")


        # task 2: with reweighing
        task_2_results_dataframes = cross_validate_search_w_reweigh(train, 
                       estimators, 
                       parameters, 
                       cv_fold_num, 
                       unprivileged_groups, 
                       privileged_groups,
                       sensitive_attr=sensitive_attr,
                       visualization=False)

        print(f"\nAt repeat number {repeat_num}, the task 2 cv results are:\n\n", task_2_results_dataframes[0], '\n\n', task_2_results_dataframes[1])
        
        # test the selected models from task 2 on held-out test dataset
        # best model based on accuracy(model 3):
        acc_model = task_2_results_dataframes[0].reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_3 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_3 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)

        # best model based on fairness(model 4):
        fair_model = task_2_results_dataframes[1].reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_4 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_4 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)

        # train the selected models
        model_3_trained = train_model_w_reweigh(model_3, train, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)
        model_4_trained = train_model_w_reweigh(model_4, train, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)

        model_3_results = evaluate_w_reweigh(model_3_trained, test, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)
        model_4_results = evaluate_w_reweigh(model_4_trained, test, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)

        results_per_repeat.append(model_3_results['accuracy'])
        results_per_repeat.append(model_3_results['eq_opp_diff'])
        results_per_repeat.append(model_4_results['accuracy'])
        results_per_repeat.append(model_4_results['eq_opp_diff'])

        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_3: {acc_model_type}_C_{acc_C_best}',
                        model_3_results['accuracy'],
                        model_3_results['eq_opp_diff']
                      ])
        results.append([f'model_4: {fair_model_type}_C_{fair_C_best}',
                        model_4_results['accuracy'],
                        model_4_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_3 & model_4 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_2_results.csv")
          
        # Task 3: model selection based on both accuracy and fairness
    
        # first for the fairness method(reweighing from task2) based model(model 5)
        combined_fairness_df = task_2_results_dataframes[0]
        combined_fairness_df['combinedmetric'] = combined_metric(combined_fairness_df['accuracy'], 
                                                                 abs(combined_fairness_df['eq_opp_diff']))
        combined_fairness_df = combined_fairness_df.sort_values(by='combinedmetric', ascending=False)
        
        # select the model 5 as the best model based on combinedmetric result using task 2 method
        fair_model = combined_fairness_df.reset_index().iloc[0][0].split('_')
        if fair_model[0] == 'lr':
            # logistic regression model
            fair_model_type = 'lr'
            fair_C_best = float(fair_model[-1])
            model_5 = LogisticRegression(C=fair_C_best, solver=SOLVER, random_state=1)
        elif fair_model[0] == 'svm':
            # svm model
            fair_model_type = 'svm'
            fair_C_best = float(fair_model[-1])
            model_5 = svm.LinearSVC(C=fair_C_best, max_iter=MAX_ITER, random_state=1)
        
        # train and test the model 5
        model_5_trained = train_model_w_reweigh(model_5, train, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)
        model_5_results = evaluate_w_reweigh(model_5_trained, test, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)
        
        results_per_repeat.append(model_5_results['accuracy'])
        results_per_repeat.append(model_5_results['eq_opp_diff'])
        
        # now for the standard method model(from task 1)
        combined_standard_df = task_1_results_dataframes[0]
        combined_standard_df['combinedmetric'] =combined_metric(combined_standard_df['accuracy'], 
                                                                 abs(combined_standard_df['eq_opp_diff']))
        combined_standard_df = combined_standard_df.sort_values(by='combinedmetric', ascending=False)
        
        # select the model 6 as the best model based on combinedmetric result using task 1 method
        acc_model = combined_standard_df.reset_index().iloc[0][0].split('_')
        if acc_model[0] == 'lr':
            # logistic regression model
            acc_model_type = 'lr'
            acc_C_best = float(acc_model[-1])
            model_6 = LogisticRegression(C=acc_C_best, solver=SOLVER, random_state=1)
        elif acc_model[0] == 'svm':
            # svm model
            acc_model_type = 'svm'
            acc_C_best = float(acc_model[-1])
            model_6 = svm.LinearSVC(C=acc_C_best, max_iter=MAX_ITER, random_state=1)
        
        # train and test the model 6
        model_6_trained = train_model(model_6, train, sensitive_attr=sensitive_attr)
        model_6_results = evaluate(model_6_trained, test, unprivileged_groups, privileged_groups, sensitive_attr=sensitive_attr)
        
        results_per_repeat.append(model_6_results['accuracy'])
        results_per_repeat.append(model_6_results['eq_opp_diff'])
        
        print(combined_fairness_df)
        print()
        print(combined_standard_df)
        
        results = [['model', 'accuracy', 'eq_opp_diff']]
        # test the selected models
        results.append([f'model_5: {fair_model_type}_C_{fair_C_best}',
                        model_5_results['accuracy'],
                        model_5_results['eq_opp_diff']
                      ])
        results.append([f'model_6: {acc_model_type}_C_{acc_C_best}',
                        model_6_results['accuracy'],
                        model_6_results['eq_opp_diff']
                      ])

        print(f"\nAt repeat number {repeat_num}, the test results for model_5 & model_6 are: \n\n", pd.DataFrame(results[1:], columns=results[0]).set_index('model'))
        if save_results is True:
            pd.DataFrame(results[1:], columns=results[0]).set_index('model').to_csv(f"repeat_{repeat_num}_task_3_results.csv")
        
        
        
        # finished the three tasks
        final_results.append(results_per_repeat)
        
    
    return pd.DataFrame(final_results[1:], columns=final_results[0]).set_index('repeats')


In [23]:
# select dataset
# dataset_used = "adult"
dataset_used = "german"

if dataset_used == "adult":
    privileged_groups = [{'sex': 1}]
    unprivileged_groups = [{'sex': 0}]
    sensitive_attr = ['sex']
    dataset_orig = load_preproc_data_adult(['sex'])
elif dataset_used == "german":
    privileged_groups = [{'age': 1}]
    unprivileged_groups = [{'age': 0}]
    sensitive_attr = ['age']
    dataset_orig = load_preproc_data_german(['age'])


# define all the model types to be used and specify the hyperparam(C)'s search range
estimators = {'lr':LogisticRegression, 'svm':svm.LinearSVC}

# C_search_range = np.arange(0.1, 0.5, 0.1)
# C_search_range = [0.001, 0.01, 0.1, 1]
C_search_range = np.logspace(-4, 1, 6)

parameters = OrderedDict(
    [('lr', {'lr_C':C_search_range}),
    ('svm', {'svm_C':C_search_range})]
    )

# rseeds = [0, 99, 199, 299, 999]
# rseeds = [1, 222, 444, 888, 248]  # final3
rseeds = [4, 5, 6, 7, 8] # final4_german
ds_nsf_results = full_run_for_ds_nsf(rseeds,
                             privileged_groups,
                             unprivileged_groups,
                             sensitive_attr,
                             dataset_orig,
                             estimators, 
                             parameters, 
                             4, 
                             save_results=True)


Total number of training examples: 700

At repeat number 1, the task 1 cv results are:

               accuracy  eq_opp_diff
model                              
lr_C_0.1      0.701429     0.007426
lr_C_1.0      0.701429     0.007426
lr_C_10.0     0.701429     0.007426
svm_C_0.01    0.701429     0.007426
svm_C_0.1     0.701429     0.007426
svm_C_1.0     0.701429     0.007426
svm_C_10.0    0.701429     0.007426
lr_C_0.01     0.681429    -0.050671
svm_C_0.001   0.681429    -0.050671
lr_C_0.0001   0.662857    -0.088073
lr_C_0.001    0.662857    -0.088073
svm_C_0.0001  0.662857    -0.088073 

               accuracy  eq_opp_diff
model                              
lr_C_0.1      0.701429     0.007426
lr_C_1.0      0.701429     0.007426
lr_C_10.0     0.701429     0.007426
svm_C_0.01    0.701429     0.007426
svm_C_0.1     0.701429     0.007426
svm_C_1.0     0.701429     0.007426
svm_C_10.0    0.701429     0.007426
lr_C_0.01     0.681429    -0.050671
svm_C_0.001   0.681429    -0.050671
lr_C_0.

In [24]:
# ds_results.to_csv('final0_nsf.csv')
print(ds_nsf_results)
mean = pd.Series(ds_nsf_results.mean(), name='mean')
std = pd.Series(ds_nsf_results.std(), name='std')
final_stats = pd.concat([mean.to_frame(), std.to_frame()], axis=1).transpose()
final_stats.to_csv('final4_german_nsf.csv')
final_stats

          model_1_accuracy  model_1_eq_opp_diff  model_2_accuracy  \
repeats                                                             
Repeat_1          0.680000            -0.034410          0.680000   
Repeat_2          0.663333             0.000000          0.663333   
Repeat_3          0.696667            -0.020502          0.696667   
Repeat_4          0.706667             0.000000          0.706667   
Repeat_5          0.713333            -0.043459          0.716667   

          model_2_eq_opp_diff  model_3_accuracy  model_3_eq_opp_diff  \
repeats                                                                
Repeat_1            -0.034410          0.680000            -0.034410   
Repeat_2             0.000000          0.663333             0.000000   
Repeat_3            -0.020502          0.696667            -0.020502   
Repeat_4             0.000000          0.703333             0.005848   
Repeat_5            -0.008692          0.716667            -0.008692   

          m

Unnamed: 0,model_1_accuracy,model_1_eq_opp_diff,model_2_accuracy,model_2_eq_opp_diff,model_3_accuracy,model_3_eq_opp_diff,model_4_accuracy,model_4_eq_opp_diff,model_5_accuracy,model_5_eq_opp_diff,model_6_accuracy,model_6_eq_opp_diff
mean,0.692,-0.019674,0.692667,-0.012721,0.692,-0.011551,0.691333,-0.011551,0.691333,-0.011551,0.692667,-0.012721
std,0.020358,0.019734,0.021266,0.014755,0.020763,0.016179,0.019805,0.016179,0.019805,0.016179,0.021266,0.014755
