# FAIR IN-PROCESSING

This notebook implements the following in-processors:
- Prejudice Remover [(Kamishima et al. 2012)](https://link.springer.com/chapter/10.1007/978-3-642-33486-3_3)

A further analysis of the processor outputs is performed in `code_06_inprocess3.R`.

The notebook loads the data exported in `code_00_partitioning.ipynb` and applies in-processors. The processor predictions are exported as CSV files.

In [21]:
##### PACKAGES

# working paths
%run code_00_working_paths.py

import pickle
import numpy as np
import time

import tensorflow as tf

from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing
from aif360.algorithms.inprocessing import PrejudiceRemover


from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MaxAbsScaler

import matplotlib.pyplot as plt
import os

import sys
sys.path.append(func_path)

from load_data import load_dataset

## 1. Parameters and preparations

In [22]:
##### PARAMETERS

# sepcify data set
# one of ['data1', 'data2', ..., 'data50']
data = 'data1'

# partitioning
num_folds  = 10
seed       = 1

In [23]:
##### IN-PROCESSOR PARAMS

all_eta = [1, 15, 50, 70, 100]

## 2. Data import

In [24]:
##### RANDOM SEED

np.random.seed(seed)

In [25]:
# Assuming data is in the format 'dataN' where N is the dataset number
dataset_number = data[4:]

# Create the directory path (assuming directories data1, data2, ..., data50 already exist)
input_dir = os.path.join(data_path, 'prepared')

# Construct the full file path
file_path = os.path.join(input_dir, data + '_orig_test.pkl')

# Load the dataset
with open(file_path, 'rb') as file:
    dataset_orig_test = pickle.load(file)
    
# Convert to dataframe and print the shape
te = dataset_orig_test.convert_to_dataframe()[0]
print(te.shape)

(240, 6)


In [26]:
##### DATA PREP

# protected attribute
protected           = 'race'
privileged_groups   = [{'race': 1}] 
unprivileged_groups = [{'race': 0}]

## 3. Fair processing

In [27]:
##### MODELING: PREJUDICE REMOVER
import pandas as pd

# timer
cv_start = time.time()

# Assuming data is in the format 'dataN' where N is the dataset number
dataset_number = data[4:]

# Create the directory path (assuming directories data1, data2, ..., data50 already exist)/
input_dir = os.path.join(data_path, 'prepared')
output_dir = os.path.join(res_path, 'inprocess1', 'intermediate')

# loop through folds
for fold in range(num_folds):
    
    ##### LOAD DATA

    # feedback
    print('-'*30)
    print('- FOLD ' + str(fold) + '...')
    print('-'*30)

    # import data subsets
    train_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_train.pkl')
    valid_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_valid.pkl')
    test_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_test.pkl')
        
    # Load the dataset
    with open(train_path, 'rb') as file:
        data_train = pickle.load(file)
    with open(valid_path, 'rb') as file:
        data_valid = pickle.load(file)
    with open(test_path, 'rb') as file:
        data_test = pickle.load(file)


    ##### MODELING

    # placeholders
    pr_predictions_valid = pd.DataFrame()
    pr_predictions_test  = pd.DataFrame()

    # loop through eta
    for eta in all_eta:
        
        # feedback
        print('--- eta: %.2f' % eta)
        colname = 'eta_' + str(eta)

        # fit PR
        debiased_model = PrejudiceRemover(eta = eta, sensitive_attr = protected, class_attr = 'target')
        debiased_model.fit(data_train)

        # predict validation scores
        dataset_debiasing_valid       = debiased_model.predict(data_valid)
        scores                        = dataset_debiasing_valid.scores
        pr_predictions_valid[colname] = sum(scores.tolist(), [])

        # predict test scores
        dataset_debiasing_test       = debiased_model.predict(data_test)
        scores                       = dataset_debiasing_test.scores
        pr_predictions_test[colname] = sum(scores.tolist(), [])

    # export CSV
    pr_predictions_valid.to_csv(os.path.join(output_dir, data + '_' +  str(fold) + '_PR_predictions_valid.csv'), index = None, header=True)
    pr_predictions_test.to_csv(os.path.join(output_dir, data + '_' + str(fold) + '_PR_predictions_test.csv'),  index = None, header=True)
    print('')
  
    
# print performance
print('')
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))

------------------------------
- FOLD 0...
------------------------------
--- eta: 1.00


AttributeError: 'DataFrame' object has no attribute 'features'