# Pre1 + in2
1. Reweighting + Adversarial Debiasing
2. Disparate Impact Remover + Adversarial Debiasing

In [1]:
##### PACKAGES

# working paths
%run code_00_working_paths.py

import pickle
import numpy as np
import time
import tensorflow as tf

from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.algorithms.preprocessing.reweighing import Reweighing
from aif360.algorithms.preprocessing.lfr import LFR
from aif360.algorithms.preprocessing import DisparateImpactRemover
from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing
from aif360.algorithms.inprocessing import PrejudiceRemover

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MaxAbsScaler

import matplotlib.pyplot as plt
import os
import pandas as pd

import sys
sys.path.append(func_path)

from load_data import load_dataset

pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


## Parameters and preparations

In [2]:
##### PARAMETERS

# specify data set
# one of ['data1', 'data2', ..., 'data50']
data = 'data1'

# partitioning
num_folds = 10
seed      = 1
use_fold  = 0 # one of [0, 1, ..., 4 (num_folds-1)]

In [3]:
##### PRE-PROCESSOR PARAMS

all_lambda = [0.5,0.6,0.7,0.8,0.9,1.0]

In [4]:
##### IN-PROCESSOR PARAMS

adversary_loss_weight = 0.1 # other options: [0.1, 0.01, 0.001]

## Data import

In [5]:
##### RANDOM SEED

np.random.seed(seed)

In [6]:
##### LOAD PARTITIONING

# Assuming data is in the format 'dataN' where N is the dataset number
dataset_number = data[4:]

# Create the directory path (assuming directories data1, data2, ..., data50 already exist)
input_dir = os.path.join(data_path, 'prepared')

# Construct the full file path
file_path = os.path.join(input_dir, data + '_orig_test.pkl')

# Load the dataset
with open(file_path, 'rb') as file:
    dataset_orig_test = pickle.load(file)
    
# Convert to dataframe and print the shape
te = dataset_orig_test.convert_to_dataframe()[0]
print(te.shape)

(240, 6)


In [7]:
##### DATA PREP

# protected attribute
protected           = 'race'
privileged_groups   = [{'race': 1}] 
unprivileged_groups = [{'race': 0}]

## Fair processing

In [8]:
##### MODELING: RW and AD
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Disable TensorFlow eager execution
tf.compat.v1.disable_eager_execution()

# Create a TensorFlow session
sess = tf.Session()

# Define the normalization function
def safe_normalize(x):
    norm = tf.norm(x)
    print("x:", x)
    print("norm:", norm)
    return tf.cond(tf.not_equal(norm, 0), lambda: x / norm, lambda: x)

# timer
cv_start = time.time()

# Create the directory path for inprocessor output
output_dir = os.path.join(res_path, 'pre1in2')

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# preprocessing and inprocessing loop
print('-' * 30)
print('- METHOD: RW...')
print('-' * 30)

# loop through fold combinations
for fold in range(num_folds):
    
    # feedback
    print('-'*30)
    print('- FOLD ' + str(fold) + '...')
    print('-'*30)

    ##### LOAD DATA
    # import data subsets
    train_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_train.pkl')
    valid_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_valid.pkl')
    test_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_test.pkl')

    # Load the dataset
    with open(train_path, 'rb') as file:
        data_train = pickle.load(file)
    with open(valid_path, 'rb') as file:
        data_valid = pickle.load(file)
    with open(test_path, 'rb') as file:
        data_test = pickle.load(file)

    # Reweighing
    RW = Reweighing(unprivileged_groups=unprivileged_groups, privileged_groups=privileged_groups)
    RW.fit(data_train)

    # Transform the data
    dataset_transf_train = RW.transform(data_train)
    dataset_transf_valid = RW.transform(data_valid)
    dataset_transf_test = RW.transform(data_test)

    ##### IN-PROCESSOR: ADVERSARIAL DEBIASING

    # fit adversarial debiasing
    with tf.variable_scope('debiased_classifier', reuse=tf.AUTO_REUSE):
        debiased_model = AdversarialDebiasing(privileged_groups=privileged_groups,
                                              unprivileged_groups=unprivileged_groups,
                                              debias=True,
                                              adversary_loss_weight=adversary_loss_weight,
                                              scope_name='debiased_classifier',
                                              sess=sess)
        debiased_model.fit(dataset_transf_train)

    # apply the model to valid data
    scores_valid = debiased_model.predict(dataset_transf_valid).scores
    scores_valid_flat = scores_valid.flatten()
    advdebias_predictions = pd.DataFrame()
    advdebias_predictions['scores'] = scores_valid_flat
    advdebias_predictions['targets'] = dataset_transf_valid.labels.flatten()
    advdebias_predictions.to_csv(os.path.join(output_dir, data + '_' + str(fold) + '_AD_RW_predictions_valid.csv'), 
                                 index=None, 
                                 header=True)

    # apply the model to test data
    scores_test = debiased_model.predict(dataset_transf_test).scores
    scores_test_flat = scores_test.flatten()

    advdebias_predictions = pd.DataFrame()
    advdebias_predictions['scores'] = scores_test_flat
    advdebias_predictions.to_csv(os.path.join(output_dir, data + '_' + str(fold) + '_AD_RW_predictions_test.csv'), 
                                 index=None, 
                                 header=True)

    print('')

##### END LOOP

# feedback
print('')

# print performance
print('')
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))







Instructions for updating:
non-resource variables are not supported in the long term


Instructions for updating:
non-resource variables are not supported in the long term








------------------------------
- METHOD: RW...
------------------------------
------------------------------
- FOLD 0...
------------------------------
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.








epoch 0; iter: 0; batch classifier loss: 0.695921; batch adversarial loss: 0.881330
epoch 1; iter: 0; batch classifier loss: 0.684307; batch adversarial loss: 0.902722
epoch 2; iter: 0; batch classifier loss: 0.715801; batch adversarial loss: 0.914391
epoch 3; iter: 0; batch classifier loss: 0.720084; batch adversarial loss: 0.919068
epoch 4; iter: 0; batch classifier loss: 0.735882; batch adversarial loss: 0.929588
epoch 5; iter: 0; batch classifier loss: 0.725645; batch adversarial loss: 0.917140
epoch 6; iter: 0; batch classifier loss: 0.771810; batch adversarial loss: 0.941367
epoch 7; iter: 0; batch classifier loss: 0.784690; batch adversarial loss: 0.953206
epoch 8; iter: 0; batch classifier loss: 0.811011; batch adversarial loss: 0.964823
epoch 9; iter: 0; batch classifier loss: 0.833914; batch adversarial loss: 0.937362
epoch 10; iter: 0; batch classifier loss: 0.839917; batch adversarial loss: 0.941357
epoch 11; iter: 0; batch classifier loss: 0.850234; batch adversarial loss:

ValueError: None values not supported.

In [None]:
##### MODELING: DI and AD
import numpy as np
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

# Disable TensorFlow eager execution
tf.compat.v1.disable_eager_execution()

# Create a TensorFlow session
sess = tf.Session()

# Define the normalization function
def safe_normalize(x):
    norm = tf.norm(x)
    print("x:", x)
    print("norm:", norm)
    return tf.cond(tf.not_equal(norm, 0), lambda: x / norm, lambda: x)

##### MODELING

# timer
cv_start = time.time()

# Create the directory path for inprocessor output
output_dir = os.path.join(res_path, 'pre1in2')

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# preprocessing and inprocessing loop
print('-' * 30)
print('- METHOD: DI...')
print('-' * 30)

# loop through fold combinations
for fold in range(num_folds):
    
    # feedback
    print('-'*30)
    print('- FOLD ' + str(fold) + '...')
    print('-'*30)

    ##### LOAD DATA
    # import data subsets
    train_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_train.pkl')
    valid_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_valid.pkl')
    test_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_test.pkl')

    # Load the dataset
    with open(train_path, 'rb') as file:
        data_train = pickle.load(file)
    with open(valid_path, 'rb') as file:
        data_valid = pickle.load(file)
    with open(test_path, 'rb') as file:
        data_test = pickle.load(file)

    for i in all_lambda:
        # Disparate Impact Remover
        di = DisparateImpactRemover(repair_level=i, sensitive_attribute=protected)
        
        # Transform the data
        dataset_transf_train = di.fit_transform(data_train)
        dataset_transf_valid = di.fit_transform(data_valid)
        dataset_transf_test = di.fit_transform(data_test)
        
        ##### IN-PROCESSOR: ADVERSARIAL DEBIASING
        
        # fit adversarial debiasing
        with tf.variable_scope('debiased_classifier', reuse=tf.AUTO_REUSE):
            debiased_model = AdversarialDebiasing(privileged_groups=privileged_groups,
                                                  unprivileged_groups=unprivileged_groups,
                                                  debias=True,
                                                  adversary_loss_weight=adversary_loss_weight,
                                                  scope_name='debiased_classifier',
                                                  sess=sess)
            debiased_model.fit(dataset_transf_train)

        # apply the model to valid data
        scores_valid = debiased_model.predict(dataset_transf_valid).scores
        scores_valid_flat = scores_valid.flatten()
        advdebias_predictions = pd.DataFrame()
        advdebias_predictions['scores'] = scores_valid_flat
        advdebias_predictions['targets'] = dataset_transf_valid.labels.flatten()
        advdebias_predictions.to_csv(os.path.join(output_dir, data + '_' + str(fold) + '_AD_DI_' + str(i) + '_predictions_valid.csv'), 
                                     index=None, 
                                     header=True)

        # apply the model to test data
        scores_test = debiased_model.predict(dataset_transf_test).scores
        scores_test_flat = scores_test.flatten()

        advdebias_predictions = pd.DataFrame()
        advdebias_predictions['scores'] = scores_test_flat
        advdebias_predictions.to_csv(os.path.join(output_dir, data + '_' + str(fold) + '_AD_DI_' + str(i) + '_predictions_test.csv'), 
                                     index=None, 
                                     header=True)
        print('')

##### END LOOP

# feedback
print('')

# print performance
print('')
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))