# In-Processing

<img src="../images/in-processing.png" alt="Drawing" style="width: 600px;"/>

In [1]:
from aif360.metrics.classification_metric import ClassificationMetric
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import warnings
import joblib

from utils import make_dataset, display_results

warnings.filterwarnings('ignore')

PRIVILEGED_INFO = {'unprivileged_groups':[{'race': 2},
                                          {'race': 1},
                                          {'race': 4},
                                          {'race': 5},
                                          {'race': 6}],
                   'privileged_groups':[{'race': 3}]
                  }

BIAS_INFO = {'favorable_label':0,
             'unfavorable_label':1,
             'protected_columns':['race'],
            }

data = pd.read_csv('../data/processed/compas-scores-two-years-processed.csv')

DROP_COLS = ['two_year_recid','compas_score','decile_score','compas_class']
FEATURE_COLS = data.drop(DROP_COLS, axis=1).columns.tolist()

train, test = train_test_split(data, test_size=0.2, random_state=1234)

# Prejudice Remover

## Method

TODO

## Pros and Cons

TODO

## Materials

* Paper ["Fairness-awere data mining" by Kamishima](http://www.kamishima.net/archive/fadm.pdf)

In [2]:
from aif360.algorithms.inprocessing import PrejudiceRemover

ground_truth_train = make_dataset(train[FEATURE_COLS], train['two_year_recid'], **BIAS_INFO, **PRIVILEGED_INFO)
ground_truth_test = make_dataset(test[FEATURE_COLS], test['two_year_recid'], **BIAS_INFO, **PRIVILEGED_INFO)

clf = PrejudiceRemover(eta=0.1,sensitive_attr='race')

clf.fit(ground_truth_train)
prediction_test = clf.predict(ground_truth_test)

# # Problem with AIF360 implementation
acc = accuracy_score(test['two_year_recid'], (prediction_test.labels==0).astype(int))

clf_metric = ClassificationMetric(ground_truth_test, prediction_test,**PRIVILEGED_INFO)

In [3]:
joblib.dump((clf_metric,acc), '../results/1.3-prejudice_remover.pkl')
display_results('../results/1.3-prejudice_remover.pkl')

Unnamed: 0,metric_names,scores
0,accuracy_score,0.669439
1,true_positive_rate_difference,0.100579
2,false_positive_rate_difference,0.230618
3,false_omission_rate_difference,0.035059
4,false_discovery_rate_difference,0.029225
5,error_rate_difference,0.028258
6,false_positive_rate_ratio,1.654547
7,false_negative_rate_ratio,0.882588
8,false_omission_rate_ratio,1.053395
9,false_discovery_rate_ratio,1.046415

Unnamed: 0,metric_names,scores
0,accuracy_score,0.669439
1,true_positive_rate_difference,0.100579
2,false_positive_rate_difference,0.230618
3,false_omission_rate_difference,0.035059
4,false_discovery_rate_difference,0.029225
5,error_rate_difference,0.028258
6,false_positive_rate_ratio,1.654547
7,false_negative_rate_ratio,0.882588
8,false_omission_rate_ratio,1.053395
9,false_discovery_rate_ratio,1.046415


# Adversarial Fairness

## Method

<img src="../images/adversarial_fairness.png" alt="Drawing" style="width: 600px;"/>

* Classifier predicts class score
* Adversarial predicts sensitive attribute ('race') from classifier predicted class score ('will re-offend')
* Model optimizes class prediction loss (minimize) and attribute prediction loss (maximize)

<img src="../images/adversarial_fairness_training.gif" alt="Drawing" style="width: 600px;"/>

## Pros and Cons

## Materials

* Paper ["Mitigating Unwanted Biases with Adversarial Learning" by Zhang, Lemoine, Mitchell](https://arxiv.org/abs/1801.07593)
* Blog post ["Qualitative model fairness" by godatadriven.com](https://blog.godatadriven.com/fairness-in-ml)

In [2]:
from aif360.algorithms.inprocessing import AdversarialDebiasing
import tensorflow as tf

def collapse_group(x):
    if x!=3:
        return 0
    else:
        return 1

train_ = train.copy()
test_ = test.copy()

train_['race'] = train_['race'].apply(collapse_group)
test_['race'] = test_['race'].apply(collapse_group)

PRIVILEGED_INFO = {'unprivileged_groups':[{'race': 0}],
                   'privileged_groups':[{'race': 1}]
                  }

ground_truth_train = make_dataset(train_[FEATURE_COLS], train_['two_year_recid'], **BIAS_INFO, **PRIVILEGED_INFO)
ground_truth_test = make_dataset(test_[FEATURE_COLS], test_['two_year_recid'], **BIAS_INFO, **PRIVILEGED_INFO)

sess = tf.Session()

model_params = {'classifier_num_hidden_units':200, 
                'num_epochs':20,
                'batch_size':128,
                'adversary_loss_weight':0.1,
                'debias':True,
                'seed':1234,
               }

clf = AdversarialDebiasing(scope_name='debiased_classifier', sess=sess, **PRIVILEGED_INFO, **model_params)

clf.fit(ground_truth_train)
prediction_test = clf.predict(ground_truth_test)

acc = accuracy_score(test_['two_year_recid'], prediction_test.labels)
clf_metric = ClassificationMetric(ground_truth_test, prediction_test, **PRIVILEGED_INFO)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
epoch 0; iter: 0; batch classifier loss: 1.961986; batch adversarial loss: 0.841276
epoch 1; iter: 0; batch classifier loss: 0.814475; batch adversarial loss: 0.737933
epoch 2; iter: 0; batch classifier loss: 0.979994; batch adversarial loss: 0.718240
epoch 3; iter: 0; batch classifier loss: 0.695990; batch adversarial loss: 0.695691
epoch 4; iter: 0; batch classifier loss: 0.686247; batch adversarial loss: 0.741852
epoch 5; iter: 0; batch classifier loss: 0.696875; batch adversarial loss: 0.669520
epoch 6; iter: 0; batch classifier loss: 0.819909; batch adversarial loss

In [3]:
joblib.dump((clf_metric,acc), '../results/1.3-adversarial_fairness.pkl')
display_results('../results/1.3-adversarial_fairness.pkl')

Unnamed: 0,metric_names,scores
0,accuracy_score,0.673597
1,true_positive_rate_difference,-0.064877
2,false_positive_rate_difference,-0.124687
3,false_omission_rate_difference,-0.021452
4,false_discovery_rate_difference,-0.004735
5,error_rate_difference,-0.006101
6,false_positive_rate_ratio,0.770814
7,false_negative_rate_ratio,1.354913
8,false_omission_rate_ratio,0.941533
9,false_discovery_rate_ratio,0.984983

Unnamed: 0,metric_names,scores
0,accuracy_score,0.673597
1,true_positive_rate_difference,-0.064877
2,false_positive_rate_difference,-0.124687
3,false_omission_rate_difference,-0.021452
4,false_discovery_rate_difference,-0.004735
5,error_rate_difference,-0.006101
6,false_positive_rate_ratio,0.770814
7,false_negative_rate_ratio,1.354913
8,false_omission_rate_ratio,0.941533
9,false_discovery_rate_ratio,0.984983


## Notes

* It predicts classes not scores!

In [10]:
sess.close()
tf.reset_default_graph()