# FAIR IN-PROCESSING

This notebook implements the following in-processors:
- prejudice remover
- meta fair algorithm

A further analysis of the processor outputs is performed in `code_06_inprocess3.R`.

The notebook loads the data exported in `code_00_partitinoing.ipynb` and applies pre-processors. The processor predictions are exported as CSV files.

## 1. Parameters and preparations

In [1]:
##### PARAMETERS

# working path
path      = 'H:/Fair Credit Scoring/'
func_path = path + 'functions/'
data_path = path + 'data/'
res_path  = path + 'results/'
out_path  = path + 'output/'

# data  set
# one of ['bene', 'german', 'uk', 'taiwan', 'pkdd', 'gmsc', 'homecredit']
data = 'taiwan' 

# partitioning
num_folds  = 5
seed       = 1

In [2]:
##### IN-PROCESSOR PARAMS

all_eta = [1, 5, 15, 30, 50, 70, 100, 150]
all_tau = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]

In [3]:
##### PACKAGES

import sys
sys.path.append(func_path)

import pickle
import numpy as np
import time

from load_data import *

from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.inprocessing.meta_fair_classifier import MetaFairClassifier
from aif360.algorithms.inprocessing.celisMeta.utils import getStats
from aif360.algorithms.inprocessing import PrejudiceRemover

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MaxAbsScaler

import matplotlib.pyplot as plt

!pip install BlackBoxAuditing

Import error: No module named 'tensorflow'


## 2. Data import

In [4]:
##### RANDOM SEED

np.random.seed(seed)

In [5]:
##### LOAD PARTITIONING

dataset_orig_test = pickle.load(open(data_path + 'prepared/' + data + '_orig_test.pkl', 'rb'))
te                = dataset_orig_test.convert_to_dataframe()[0]
print(te.shape)

(15000, 186)


In [6]:
##### DATA PREP

# protected attribute
protected           = 'AGE'
privileged_groups   = [{'AGE': 1}] 
unprivileged_groups = [{'AGE': 0}]

## 3. Fair processing

In [7]:
##### MODELING: PREJUDICE REMOVER

# timer
cv_start = time.time()

# loop through folds
for fold in range(num_folds):
    
    ##### LOAD DATA

    # feedback
    print('-'*30)
    print('- FOLD ' + str(fold) + '...')
    print('-'*30)

    # import data subsets
    data_train = pickle.load(open(data_path + 'prepared/' + data + '_scaled_' + str(fold) + '_train.pkl', 'rb'))
    data_valid = pickle.load(open(data_path + 'prepared/' + data + '_scaled_' + str(fold) + '_valid.pkl', 'rb'))
    data_test  = pickle.load(open(data_path + 'prepared/' + data + '_scaled_' + str(fold) + '_test.pkl',  'rb'))


    ##### MODELING

    # placeholders
    pr_predictions_valid = pd.DataFrame()
    pr_predictions_test  = pd.DataFrame()

    # loop through eta
    for eta in all_eta:
        
        # feedback
        print('--- eta: %.2f' % eta)
        colname = 'eta_' + str(eta)

        # fit PR
        debiased_model = PrejudiceRemover(eta = eta, sensitive_attr = protected, class_attr = 'TARGET')
        debiased_model.fit(data_train)

        # predict validation scores
        dataset_debiasing_valid = debiased_model.predict(data_valid)
        scores = dataset_debiasing_valid.scores
        pr_predictions_valid[colname] = sum(scores.tolist(), [])

        # predict test scores
        dataset_debiasing_test  = debiased_model.predict(data_test)
        scores = dataset_debiasing_test.scores
        pr_predictions_test[colname] = sum(scores.tolist(), [])

    # export CSV
    pr_predictions_valid.to_csv(res_path + 'intermediate/' + data + '_' + str(fold) + '_PR_predictions_valid.csv', index = None, header=True)
    pr_predictions_test.to_csv(res_path  + 'intermediate/' + data + '_' + str(fold) + '_PR_predictions_test.csv',  index = None, header=True)
    print('')
    
# print performance
print('')
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))

------------------------------
- FOLD 0...
------------------------------
--- eta: 1.00
--- eta: 5.00
--- eta: 15.00
--- eta: 30.00
--- eta: 50.00
--- eta: 70.00
--- eta: 100.00
--- eta: 150.00

------------------------------
- FOLD 1...
------------------------------
--- eta: 1.00
--- eta: 5.00
--- eta: 15.00
--- eta: 30.00
--- eta: 50.00
--- eta: 70.00
--- eta: 100.00
--- eta: 150.00

------------------------------
- FOLD 2...
------------------------------
--- eta: 1.00
--- eta: 5.00
--- eta: 15.00
--- eta: 30.00
--- eta: 50.00
--- eta: 70.00
--- eta: 100.00
--- eta: 150.00

------------------------------
- FOLD 3...
------------------------------
--- eta: 1.00
--- eta: 5.00
--- eta: 15.00
--- eta: 30.00
--- eta: 50.00
--- eta: 70.00
--- eta: 100.00
--- eta: 150.00

------------------------------
- FOLD 4...
------------------------------
--- eta: 1.00
--- eta: 5.00
--- eta: 15.00
--- eta: 30.00
--- eta: 50.00
--- eta: 70.00
--- eta: 100.00
--- eta: 150.00


Finished in 207.67 minut

In [8]:
##### MODELING: META-ALGORITHM

# timer
cv_start = time.time()

# loop through folds
for fold in range(num_folds):
    
    ##### LOAD DATA

    # feedback
    print('-'*30)
    print('- FOLD ' + str(fold) + '...')
    print('-'*30)
    
    # import data subsets
    data_train = pickle.load(open(data_path + 'prepared/' + data + '_scaled_' + str(fold) + '_train.pkl', 'rb'))
    data_valid = pickle.load(open(data_path + 'prepared/' + data + '_scaled_' + str(fold) + '_valid.pkl', 'rb'))
    data_test  = pickle.load(open(data_path + 'prepared/' + data + '_scaled_' + str(fold) + '_test.pkl',  'rb'))


    ##### MODELING

    # placeholders
    meta_predictions_test  = pd.DataFrame()
    meta_predictions_valid = pd.DataFrame()

    # loop through tau
    for tau in all_tau:
        
        # feedback
        print('--- tau: %.2f' % tau)
        colname = 'tau_' + str(tau)

        # fit meta algorithm
        debiased_model = MetaFairClassifier(tau = tau, sensitive_attr = protected)
        try:
            debiased_model.fit(data_train)
        except ZeroDivisionError:
            print('---- Error, using previous tau')
            debiased_model = last_dm

        # predict test scores
        dataset_debiasing_test = debiased_model.predict(data_test)
        scores_test            = dataset_debiasing_test.scores
        meta_predictions_test[colname] = sum(scores_test.tolist(), [])
        
        # predict validation scores
        dataset_debiasing_valid = debiased_model.predict(data_valid)
        scores_valid            = dataset_debiasing_valid.scores
        meta_predictions_valid[colname] = sum(scores_valid.tolist(), [])
        
        # save model
        last_dm = debiased_model

    # export CSV
    meta_predictions_test.to_csv(res_path  + 'intermediate/' + data + '_' + str(fold) + '_MA_predictions_test.csv',  index = None, header=True)
    meta_predictions_valid.to_csv(res_path + 'intermediate/' + data + '_' + str(fold) + '_MA_predictions_valid.csv', index = None, header=True)
    print('')

# print performance
print('')
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))

------------------------------
- FOLD 0...
------------------------------
--- tau: 0.05
---- Training Accuracy:  0.7361785714285713 , Training gamma:  0.7128741862630495
--- tau: 0.10
---- Training Accuracy:  0.7281071428571428 , Training gamma:  0.7231865093666534
--- tau: 0.15
---- Training Accuracy:  0.7371071428571428 , Training gamma:  0.7111514200986092
--- tau: 0.20
---- Training Accuracy:  0.7376071428571429 , Training gamma:  0.7132916376814403
--- tau: 0.25
---- Training Accuracy:  0.7376071428571429 , Training gamma:  0.7132916376814403
--- tau: 0.30
---- Training Accuracy:  0.7376071428571429 , Training gamma:  0.7132916376814403

------------------------------
- FOLD 1...
------------------------------
--- tau: 0.05
---- Training Accuracy:  0.7363214285714286 , Training gamma:  0.7295635998282894
--- tau: 0.10
---- Training Accuracy:  0.7224642857142858 , Training gamma:  0.7333522753932216
--- tau: 0.15
---- Training Accuracy:  0.73725 , Training gamma:  0.721129261577547

KeyboardInterrupt: 