# Post-Processing

<img src="../images/post-processing.png" alt="Drawing" style="width: 600px;"/>

In [5]:
from aif360.metrics.classification_metric import ClassificationMetric
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import warnings

import joblib
from utils import make_dataset, display_results

warnings.filterwarnings('ignore')

BIAS_INFO = {'favorable_label':0,
             'unfavorable_label':1,
             'protected_columns':['race']
            }

PRIVILEGED_INFO = {'unprivileged_groups':[{'race': 2},
                                          {'race': 1},
                                          {'race': 4},
                                          {'race': 5},
                                          {'race': 6}],
                   'privileged_groups':[{'race': 3}]
                  }

data = pd.read_csv('../data/processed/compas-scores-two-years-processed.csv')

DROP_COLS = ['two_year_recid','compas_score','decile_score','compas_class']
FEATURE_COLS = data.drop(DROP_COLS, axis=1).columns.tolist()

train, test = train_test_split(data, test_size=0.2, random_state=1234)

X_train, y_train = train[FEATURE_COLS], train['two_year_recid']
X_test, y_test = test[FEATURE_COLS], test['two_year_recid']

clf = LogisticRegression(random_state=1234)
clf.fit(X_train, y_train)

y_train_pred = clf.predict_proba(X_train)
train['recid_prediction_score'] = y_train_pred[:,1]
train['recid_prediction_class'] = (train['recid_prediction_score'] >0.5).astype(int)

y_test_pred = clf.predict_proba(X_test)
test['recid_prediction_score'] = y_test_pred[:,1]
test['recid_prediction_class'] = (test['recid_prediction_score'] >0.5).astype(int)

In [15]:
ground_truth_train = make_dataset(train[FEATURE_COLS], train['two_year_recid'], **BIAS_INFO, **PRIVILEGED_INFO)
prediction_train = make_dataset(train[FEATURE_COLS], train['recid_prediction_class'], **BIAS_INFO, **PRIVILEGED_INFO)

ground_truth_test = make_dataset(test[FEATURE_COLS], test['two_year_recid'], **BIAS_INFO, **PRIVILEGED_INFO)
prediction_test = make_dataset(test[FEATURE_COLS], test['recid_prediction_class'], **BIAS_INFO, **PRIVILEGED_INFO)

# Equal Odds

## Method

## Pros and Cons

## Materials

In [16]:
from aif360.algorithms.postprocessing import EqOddsPostprocessing

calibrator = EqOddsPostprocessing(**PRIVILEGED_INFO)

calibrator.fit(ground_truth_train, prediction_train)
prediction_test = calibrator.predict(prediction_test)

roc_auc = roc_auc_score(y_test, prediction_test.labels)
clf_metric = ClassificationMetric(ground_truth_test, prediction_test,**PRIVILEGED_INFO)

In [17]:
joblib.dump((clf_metric,roc_auc), '../results/1.1-equal_odds.pkl')
display_results('../results/1.1-equal_odds.pkl')

Unnamed: 0,metric_names,scores
0,roc_auc_score,0.499815
1,true_positive_rate_difference,-0.003584
2,false_positive_rate_difference,0.002304
3,false_omission_rate_difference,-0.036625
4,false_discovery_rate_difference,1.0
5,error_rate_difference,-0.034915
6,false_positive_rate_ratio,inf
7,false_negative_rate_ratio,1.003597
8,false_omission_rate_ratio,0.937948
9,false_discovery_rate_ratio,inf

Unnamed: 0,metric_names,scores
0,roc_auc_score,0.499815
1,true_positive_rate_difference,-0.003584
2,false_positive_rate_difference,0.002304
3,false_omission_rate_difference,-0.036625
4,false_discovery_rate_difference,1.0
5,error_rate_difference,-0.034915
6,false_positive_rate_ratio,inf
7,false_negative_rate_ratio,1.003597
8,false_omission_rate_ratio,0.937948
9,false_discovery_rate_ratio,inf


# Calibrated Equal Odds

## Method

## Pros and Cons

## Materials

In [18]:
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing

calibrator = CalibratedEqOddsPostprocessing(**PRIVILEGED_INFO)

calibrator.fit(ground_truth_train, prediction_train)
prediction_test = calibrator.predict(prediction_test)

roc_auc = roc_auc_score(y_test, prediction_test.labels)
clf_metric = ClassificationMetric(ground_truth_test, prediction_test,**PRIVILEGED_INFO)

In [19]:
joblib.dump((clf_metric,roc_auc), '../results/1.1-calibrated_equal_odds.pkl')
display_results('../results/1.1-calibrated_equal_odds.pkl')

Unnamed: 0,metric_names,scores
0,roc_auc_score,0.646777
1,true_positive_rate_difference,-0.145452
2,false_positive_rate_difference,-0.340584
3,false_omission_rate_difference,-0.025175
4,false_discovery_rate_difference,-0.061631
5,error_rate_difference,-0.047208
6,false_positive_rate_ratio,0.543523
7,false_negative_rate_ratio,2.449322
8,false_omission_rate_ratio,0.930769
9,false_discovery_rate_ratio,0.830943

Unnamed: 0,metric_names,scores
0,roc_auc_score,0.646777
1,true_positive_rate_difference,-0.145452
2,false_positive_rate_difference,-0.340584
3,false_omission_rate_difference,-0.025175
4,false_discovery_rate_difference,-0.061631
5,error_rate_difference,-0.047208
6,false_positive_rate_ratio,0.543523
7,false_negative_rate_ratio,2.449322
8,false_omission_rate_ratio,0.930769
9,false_discovery_rate_ratio,0.830943


# Rejection Option

## Method

## Pros and Cons

## Materials

In [20]:
from aif360.algorithms.postprocessing import RejectOptionClassification

calibrator = RejectOptionClassification(**PRIVILEGED_INFO)

calibrator.fit(ground_truth_train, prediction_train)
prediction_test = calibrator.predict(prediction_test)

roc_auc = roc_auc_score(y_test, prediction_test.labels)
clf_metric = ClassificationMetric(ground_truth_test, prediction_test,**PRIVILEGED_INFO)

In [21]:
joblib.dump((clf_metric,roc_auc), '../results/1.1-rejection_option.pkl')
display_results('../results/1.1-rejection_option.pkl')

Unnamed: 0,metric_names,scores
0,roc_auc_score,0.646777
1,true_positive_rate_difference,-0.145452
2,false_positive_rate_difference,-0.340584
3,false_omission_rate_difference,-0.025175
4,false_discovery_rate_difference,-0.061631
5,error_rate_difference,-0.047208
6,false_positive_rate_ratio,0.543523
7,false_negative_rate_ratio,2.449322
8,false_omission_rate_ratio,0.930769
9,false_discovery_rate_ratio,0.830943

Unnamed: 0,metric_names,scores
0,roc_auc_score,0.646777
1,true_positive_rate_difference,-0.145452
2,false_positive_rate_difference,-0.340584
3,false_omission_rate_difference,-0.025175
4,false_discovery_rate_difference,-0.061631
5,error_rate_difference,-0.047208
6,false_positive_rate_ratio,0.543523
7,false_negative_rate_ratio,2.449322
8,false_omission_rate_ratio,0.930769
9,false_discovery_rate_ratio,0.830943
