<a href="https://colab.research.google.com/github/madhugraj/Abstractive-summarizor/blob/master/Adv_Bias_inProcess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install aif360
!pip install fairlearn


Collecting aif360
  Downloading aif360-0.4.0-py3-none-any.whl (175 kB)
[?25l[K     |█▉                              | 10 kB 20.7 MB/s eta 0:00:01[K     |███▊                            | 20 kB 26.8 MB/s eta 0:00:01[K     |█████▋                          | 30 kB 12.1 MB/s eta 0:00:01[K     |███████▌                        | 40 kB 9.3 MB/s eta 0:00:01[K     |█████████▍                      | 51 kB 5.1 MB/s eta 0:00:01[K     |███████████▎                    | 61 kB 5.4 MB/s eta 0:00:01[K     |█████████████                   | 71 kB 5.8 MB/s eta 0:00:01[K     |███████████████                 | 81 kB 6.5 MB/s eta 0:00:01[K     |████████████████▉               | 92 kB 4.7 MB/s eta 0:00:01[K     |██████████████████▊             | 102 kB 5.1 MB/s eta 0:00:01[K     |████████████████████▋           | 112 kB 5.1 MB/s eta 0:00:01[K     |██████████████████████▌         | 122 kB 5.1 MB/s eta 0:00:01[K     |████████████████████████▎       | 133 kB 5.1 MB/s eta 0:00:01[K  

In [2]:
%matplotlib inline

import sys
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german

from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display
import matplotlib.pyplot as plt

import tensorflow as tf

In [4]:
dataset_orig = load_preproc_data_adult()

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

In [5]:
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes, 
      dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)

#### Training Dataset shape

(34189, 18)


#### Favorable and unfavorable labels

1.0 0.0


#### Protected attribute names

['sex', 'race']


#### Privileged and unprivileged protected attribute values

[array([1.]), array([1.])] [array([0.]), array([0.])]


#### Dataset feature names

['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']


In [6]:
# Metric for the original dataset
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())
metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_test.mean_difference())

#### Original training dataset

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.196835
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.189073


In [7]:
min_max_scaler = MaxAbsScaler()
dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)
metric_scaled_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
display(Markdown("#### Scaled dataset - Verify that the scaling does not affect the group label statistics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_train.mean_difference())
metric_scaled_test = BinaryLabelDatasetMetric(dataset_orig_test, 
                             unprivileged_groups=unprivileged_groups,
                             privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_test.mean_difference())

#### Scaled dataset - Verify that the scaling does not affect the group label statistics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.196835
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.189073


In [12]:
# Load post-processing algorithm that equalizes the odds
# Learn parameters with debias set to False

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

sess = tf.Session()
plain_model = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='plain_classifier',
                          debias=False,
                          sess=sess)

Instructions for updating:
non-resource variables are not supported in the long term


In [13]:
plain_model.fit(dataset_orig_train)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
epoch 0; iter: 0; batch classifier loss: 0.695702
epoch 0; iter: 200; batch classifier loss: 0.411451
epoch 1; iter: 0; batch classifier loss: 0.432642
epoch 1; iter: 200; batch classifier loss: 0.478360
epoch 2; iter: 0; batch classifier loss: 0.402665
epoch 2; iter: 200; batch classifier loss: 0.420036
epoch 3; iter: 0; batch classifier loss: 0.425418
epoch 3; iter: 200; batch classifier loss: 0.425617
epoch 4; iter: 0; batch classifier loss: 0.427238
epoch 4; iter: 200; batch classifier loss: 0.336292
epoch 5; iter: 0; batch classifier loss: 0.472720
epoch 5; iter: 200; batch classifier loss: 0.377967
epoch 6; iter: 0; batch classifier loss: 0.507377
epoch 6; iter: 200; batch classifier loss: 0.382774
epoch 7; iter: 0; batch classifier loss: 0.333773
epoch 7; iter: 200; batch classifier loss: 0.440061
epoch 8; iter: 0; batch classifier loss: 0.392806
epoch 8; iter: 200;

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x7fb220f97fd0>

In [14]:
# Apply the plain model to test data
dataset_nodebiasing_train = plain_model.predict(dataset_orig_train)
dataset_nodebiasing_test = plain_model.predict(dataset_orig_test)

In [15]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
metric_dataset_nodebiasing_train = BinaryLabelDatasetMetric(dataset_nodebiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())

metric_dataset_nodebiasing_test = BinaryLabelDatasetMetric(dataset_nodebiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())

display(Markdown("#### Plain model - without debiasing - classification metrics"))
classified_metric_nodebiasing_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_nodebiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())

#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.210607
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.218465


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.806183
Test set: Balanced classification accuracy = 0.664527
Test set: Disparate impact = 0.000000
Test set: Equal opportunity difference = -0.470103
Test set: Average odds difference = -0.290826
Test set: Theil_index = 0.173192


In [16]:
sess.close()
tf.reset_default_graph()
sess = tf.Session()

In [17]:
# Learn parameters with debias set to True
debiased_model = AdversarialDebiasing(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups,
                          scope_name='debiased_classifier',
                          debias=True,
                          sess=sess)

In [18]:
debiased_model.fit(dataset_orig_train)

epoch 0; iter: 0; batch classifier loss: 0.692086; batch adversarial loss: 0.667461
epoch 0; iter: 200; batch classifier loss: 0.492503; batch adversarial loss: 0.690601
epoch 1; iter: 0; batch classifier loss: 0.569376; batch adversarial loss: 0.757484
epoch 1; iter: 200; batch classifier loss: 0.578737; batch adversarial loss: 0.664236
epoch 2; iter: 0; batch classifier loss: 0.698937; batch adversarial loss: 0.684701
epoch 2; iter: 200; batch classifier loss: 0.694900; batch adversarial loss: 0.641905
epoch 3; iter: 0; batch classifier loss: 0.527155; batch adversarial loss: 0.672923
epoch 3; iter: 200; batch classifier loss: 0.358030; batch adversarial loss: 0.678453
epoch 4; iter: 0; batch classifier loss: 0.483289; batch adversarial loss: 0.582703
epoch 4; iter: 200; batch classifier loss: 0.413672; batch adversarial loss: 0.536004
epoch 5; iter: 0; batch classifier loss: 0.471024; batch adversarial loss: 0.591549
epoch 5; iter: 200; batch classifier loss: 0.502687; batch adversa

<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x7fb21bd6bbd0>

In [19]:
# Apply the plain model to test data
dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(dataset_orig_test)

In [20]:
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())

# Metrics for the dataset from model with debiasing
display(Markdown("#### Model - with debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())

metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)

print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())



display(Markdown("#### Plain model - without debiasing - classification metrics"))
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())



display(Markdown("#### Model - with debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test, 
                                                 dataset_debiasing_test,
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())

#### Plain model - without debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.210607
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.218465


#### Model - with debiasing - dataset metrics

Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.085963
Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.086035


#### Plain model - without debiasing - classification metrics

Test set: Classification accuracy = 0.806183
Test set: Balanced classification accuracy = 0.664527
Test set: Disparate impact = 0.000000
Test set: Equal opportunity difference = -0.470103
Test set: Average odds difference = -0.290826
Test set: Theil_index = 0.173192


#### Model - with debiasing - classification metrics

Test set: Classification accuracy = 0.794308
Test set: Balanced classification accuracy = 0.674868
Test set: Disparate impact = 0.591627
Test set: Equal opportunity difference = -0.053238
Test set: Average odds difference = -0.034290
Test set: Theil_index = 0.166259
