# FAIR IN-PROCESSING

This notebook implements the following in-processors:
- Meta-Fairness Algorithm [(Celis et al. 2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287586?casa_token=VdBhACPUHUYAAAAA:D8-vlR7Vf5QVQXyYhHB23IBjO0xrKQH64wztDghcSCUpaUwwkWeMZ2Cqu76yjLvSCVhzpjleAAnJ)

A further analysis of the processor outputs is performed in `code_06_inprocess3.R`.

The notebook loads the data exported in `code_00_partitinoing.ipynb` and applies in-processors. The processor predictions are exported as CSV files.

In [1]:
##### PACKAGES

# working paths
%run code_00_working_paths.py

import pickle
import numpy as np
import time

from aif360.metrics import BinaryLabelDatasetMetric
from aif360.algorithms.inprocessing import MetaFairClassifier, PrejudiceRemover
from aif360.algorithms.preprocessing import Reweighing, LFR, DisparateImpactRemover
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MaxAbsScaler

import matplotlib.pyplot as plt

import sys
sys.path.append(func_path)

from load_data import load_dataset
import os

pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


## 1. Parameters and preparations

In [2]:
##### PARAMETERS

# sepcify data set
# one of ['data1', 'data2', ..., 'data50']
data = 'data1' 

# partitioning
num_folds  = 5
seed       = 1

In [3]:
##### IN-PROCESSOR PARAMS

all_tau = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]

## 2. Data import

In [4]:
##### RANDOM SEED

np.random.seed(seed)

In [5]:
# Assuming data is in the format 'dataN' where N is the dataset number
dataset_number = data[4:]

# Create the directory path (assuming directories data1, data2, ..., data50 already exist)
input_dir = os.path.join(data_path, 'prepared', 'data' + dataset_number)

# Construct the full file path
file_path = os.path.join(input_dir, data + '_orig_test.pkl')

# Load the dataset
with open(file_path, 'rb') as file:
    dataset_orig_test = pickle.load(file)
    
# Convert to dataframe and print the shape
te = dataset_orig_test.convert_to_dataframe()[0]
print(te.shape)

(240, 6)


In [6]:
##### DATA PREP

# protected attribute
protected           = 'race'
privileged_groups   = [{'race': 1}] 
unprivileged_groups = [{'race': 0}]

## 3. Fair processing

In [7]:
import pandas as pd

In [8]:
##### MODELING: META-ALGORITHM

# timer
cv_start = time.time()

# Assuming data is in the format 'dataN' where N is the dataset number
dataset_number = data[4:]

# Create the directory path (assuming directories data1, data2, ..., data50 already exist)/
input_dir = os.path.join(data_path, 'prepared', 'data' + dataset_number)
output_dir = os.path.join(res_path, 'inprocess1', 'intermediate', 'data' + dataset_number)

# loop through folds
for fold in range(num_folds):
    
    ##### LOAD DATA

    # feedback
    print('-'*30)
    print('- FOLD ' + str(fold) + '...')
    print('-'*30)
    
    # import data subsets
    train_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_train.pkl')
    valid_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_valid.pkl')
    test_path = os.path.join(input_dir, data + '_scaled_' + str(fold) + '_test.pkl')
        
    # Load the dataset
    with open(train_path, 'rb') as file:
        data_train = pickle.load(file)
    with open(valid_path, 'rb') as file:
        data_valid = pickle.load(file)
    with open(test_path, 'rb') as file:
        data_test = pickle.load(file)

    ##### MODELING

    # placeholders
    meta_predictions_test  = pd.DataFrame()
    meta_predictions_valid = pd.DataFrame()

    # loop through tau
    for tau in all_tau:
        
        # feedback
        print('--- tau: %.2f' % tau)
        colname = 'tau_' + str(tau)

        # fit meta algorithm
        debiased_model = MetaFairClassifier(tau = tau, sensitive_attr = protected)
        try:
            debiased_model.fit(data_train)
        except ZeroDivisionError:
            print('---- Error, using previous tau')
            debiased_model = last_dm

        # predict test scores
        dataset_debiasing_test = debiased_model.predict(data_test)
        scores_test            = dataset_debiasing_test.scores
        meta_predictions_test[colname] = sum(scores_test.tolist(), [])
        
        # predict validation scores
        dataset_debiasing_valid = debiased_model.predict(data_valid)
        scores_valid            = dataset_debiasing_valid.scores
        meta_predictions_valid[colname] = sum(scores_valid.tolist(), [])
        
        # save model
        last_dm = debiased_model

    # export CSV
    meta_predictions_test.to_csv(os.path.join(output_dir, data + '_' +  str(fold) + '_MA_predictions_test.csv'),  index = None, header=True)
    meta_predictions_valid.to_csv(os.path.join(output_dir, data + '_' + str(fold) + '_MA_predictions_valid.csv'), index = None, header=True)
    print('')

# print performance
print('')
print('Finished in {:.2f} minutes'.format((time.time() - cv_start) / 60))

------------------------------
- FOLD 0...
------------------------------
--- tau: 0.05


  prob_y_1 = (prob_1_1 + prob_1_0) / total
  prob_z_0 = (prob_m1_0 + prob_1_0) / total
  prob_z_1 = (prob_m1_1 + prob_1_1) / total
  probc_m1_0 = prob_m1_0 / total
  probc_m1_1 = prob_m1_1 / total


TypeError: cannot unpack non-iterable NoneType object