In [1]:
# coding: utf-8

# Import Packages

import tensorflow as tf
import numpy as np
import pandas as pd

from utils import datasets
from metric import DatasetMetric
from metric import ClassificationMetric
from algorithm.adversarial_debiasing import AdversarialDebiasing

from IPython.display import Markdown, display

display(Markdown('#### Tensorflow Version'))
display(Markdown(tf.__version__))

#### Tensorflow Version

1.15.0

# 1. Dataset

### 1-1. Load & Preprocessing

In [2]:
# Set Columns and Protected Atrributes
# 18 Features including 2 Protected Attributes and 1 Label
protected_attribute_names = ['sex', 'race']
privileged_classes = [['Male'], ['White']]
label_name = 'income-per-year'
one_hot_features = ['Age (decade)', 'Education Years']

# Load Test Dataset & Preprocess
df_orig = datasets.get_adults_df()
df_orig = datasets.preprocess_df(df_orig,
                            protected_attribute_names, privileged_classes,
                            label_name, ['>50K', '>50K.'],
                            one_hot_column_names=one_hot_features)

display(Markdown('#### Basic Statistics of DataFrame'))
datasets.describe_df(df_orig, detail=True)

Missing Data: 0 rows removed.


#### Basic Statistics of DataFrame

Shape: (48842, 19)
               race           sex  Age (decade)=10  Age (decade)=20  \
count  48842.000000  48842.000000     48842.000000     48842.000000   
mean       0.855043      0.668482         0.051390         0.245793   
std        0.352061      0.470764         0.220795         0.430561   
min        0.000000      0.000000         0.000000         0.000000   
25%        1.000000      0.000000         0.000000         0.000000   
50%        1.000000      1.000000         0.000000         0.000000   
75%        1.000000      1.000000         0.000000         0.000000   
max        1.000000      1.000000         1.000000         1.000000   

       Age (decade)=30  Age (decade)=40  Age (decade)=50  Age (decade)=60  \
count     48842.000000     48842.000000     48842.000000     48842.000000   
mean          0.264711         0.219565         0.135519         0.062528   
std           0.441184         0.413956         0.342280         0.242115   
min           0.000000         0.

### 1-2. Train Test Splitting

In [3]:
# Split Dataset to train:test=7:3
df_orig_train, df_orig_test = datasets.split_df(df_orig)

display(Markdown('#### Trainset'))
datasets.describe_df(df_orig_train)
display(Markdown('#### Testset'))
datasets.describe_df(df_orig_test)

#### Trainset

Shape: (34189, 19)


#### Testset

Shape: (14653, 19)


### 1-3. Fairness Metric Origin Data

In [4]:
# 데이터셋 자체에 대한 Metric
dataset_metric_train_without_debias = DatasetMetric(df_orig_train, 'sex', label_name)
dataset_metric_test_without_debias = DatasetMetric(df_orig_test, 'sex', label_name)

In [5]:
display(Markdown('#### Original Dataset Metric'))

display(Markdown('#### - Base Rate'))
display(Markdown('Unprivileged, Privileged Group 에서 각각 Positive가 차지하는 비율'))
print('Train Set, Unprivileged Group: %f'%dataset_metric_train_without_debias.base_rate(privileged=False))
print(' Test Set, Unprivileged Group: %f'%dataset_metric_test_without_debias.base_rate(privileged=False))
print('Train Set, Privileged Group: %f'%dataset_metric_train_without_debias.base_rate(privileged=True))
print(' Test Set, Privileged Group: %f'%dataset_metric_test_without_debias.base_rate(privileged=True))

display(Markdown('#### - Mean Difference'))
display(Markdown('Unprivileged, Privileged Group 간 Base Rate의 차'))
print('Train Set: Mean Difference between Unprivileged and Privileged Group = %f'%dataset_metric_train_without_debias.mean_difference())
print(' Test Set: Mean Difference between Unprivileged and Privileged Group = %f'%dataset_metric_test_without_debias.mean_difference())

#### Original Dataset Metric

#### - Base Rate

Unprivileged, Privileged Group 에서 각각 Positive가 차지하는 비율

Train Set, Unprivileged Group: 0.107304
 Test Set, Unprivileged Group: 0.113743
Train Set, Privileged Group: 0.304927
 Test Set, Privileged Group: 0.301046


#### - Mean Difference

Unprivileged, Privileged Group 간 Base Rate의 차

Train Set: Mean Difference between Unprivileged and Privileged Group = -0.197623
 Test Set: Mean Difference between Unprivileged and Privileged Group = -0.187302


## 2. Learn Classifier with Adversarial Debiasing

### 2-1. Model without debiasing

In [6]:
# Debiasing을 하지 않는 Plain Model
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

sess = tf.Session()
plain_model = AdversarialDebiasing(unprivileged_groups, privileged_groups, 'plain_classifier', sess, debias=False)

In [7]:
plain_model.fit(df_orig_train, protected_attribute_names, label_name)




The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where





epoch 0; iter: 0; batch classifier loss: 0.693265
epoch 0; iter: 200; batch classifier loss: 0.421329
epoch 1; iter: 0; batch classifier loss: 0.423698
epoch 1; iter: 200; batch classifier loss: 0.480042
epoch 2; iter: 0; batch classifier loss: 0.420901
epoch 2; iter: 200; batch classifier loss: 0.521556
epoch 3; iter: 0; batch classifier loss: 0.427451
epoch 3; iter: 200; batch classifier loss: 0.352460
epoch 4; it

<algorithm.adversarial_debiasing.AdversarialDebiasing at 0x7faffa7e33d0>

In [8]:
# 학습된 Plain Model을 Test Data에 적용
df_pred_train_nodebiased = plain_model.predict(df_orig_train, label_name)
df_pred_test_nodebiased = plain_model.predict(df_orig_test, label_name)

### 2-2. Model with debiasing

In [9]:
# Debiasing을 수행하는 Debiased Model
sess.close()
tf.reset_default_graph()
sess = tf.Session()

debiased_model = AdversarialDebiasing(unprivileged_groups, privileged_groups, 'debiased_classifier', sess, debias=True)

In [10]:
debiased_model.fit(df_orig_train, protected_attribute_names, label_name)

epoch 0; iter: 0; batch classifier loss: 0.729280; batch adversarial loss: 0.712491
epoch 0; iter: 200; batch classifier loss: 0.475521; batch adversarial loss: 0.663415
epoch 1; iter: 0; batch classifier loss: 0.500114; batch adversarial loss: 0.660279
epoch 1; iter: 200; batch classifier loss: 0.571726; batch adversarial loss: 0.641949
epoch 2; iter: 0; batch classifier loss: 0.479941; batch adversarial loss: 0.643456
epoch 2; iter: 200; batch classifier loss: 0.390424; batch adversarial loss: 0.598190
epoch 3; iter: 0; batch classifier loss: 0.482570; batch adversarial loss: 0.656016
epoch 3; iter: 200; batch classifier loss: 0.511787; batch adversarial loss: 0.584736
epoch 4; iter: 0; batch classifier loss: 0.437538; batch adversarial loss: 0.643871
epoch 4; iter: 200; batch classifier loss: 0.434313; batch adversarial loss: 0.593502
epoch 5; iter: 0; batch classifier loss: 0.458726; batch adversarial loss: 0.636523
epoch 5; iter: 200; batch classifier loss: 0.495993; batch adversa

epoch 48; iter: 0; batch classifier loss: 0.360662; batch adversarial loss: 0.586437
epoch 48; iter: 200; batch classifier loss: 0.434596; batch adversarial loss: 0.598391
epoch 49; iter: 0; batch classifier loss: 0.420739; batch adversarial loss: 0.603805
epoch 49; iter: 200; batch classifier loss: 0.416696; batch adversarial loss: 0.609185


<algorithm.adversarial_debiasing.AdversarialDebiasing at 0x7fb0743848d0>

In [11]:
# 학습된 Debiased Model을 Test Data에 적용
df_pred_train_debiased = debiased_model.predict(df_orig_train, label_name)
df_pred_test_debiased = debiased_model.predict(df_orig_test, label_name)

## 3. Fairness Metrics

In [12]:
# Debiasing 전후 Dataset과 Model 성능 비교를 위한 Metric 측정
dataset_metric_train_without_debias = DatasetMetric(df_orig_train, 'sex', label_name)
dataset_metric_test_without_debias = DatasetMetric(df_orig_test, 'sex', label_name)
classified_metric_train_without_debias = ClassificationMetric(df_orig_train, df_pred_train_nodebiased, 'sex', label_name)
classified_metric_test_without_debias = ClassificationMetric(df_orig_test, df_pred_test_nodebiased, 'sex', label_name)

dataset_metric_train_with_debias = DatasetMetric(df_pred_train_debiased, 'sex', label_name)
dataset_metric_test_with_debias = DatasetMetric(df_pred_test_debiased, 'sex', label_name)
classified_metric_train_with_debias = ClassificationMetric(df_orig_train, df_pred_train_debiased, 'sex', label_name)
classified_metric_test_with_debias = ClassificationMetric(df_orig_test, df_pred_test_debiased, 'sex', label_name)

In [13]:
def explain_metric(met):
    print('Accuracy: ', met.accuracy())
    print('Balanced Accuray: ', met.balanced_accuracy())
    print('Disparate Impact: ', met.disparate_impact())
    print('Equal Opportunity Difference: ', met.equal_opportunity_difference())
    print('Average Odds Difference: ', met.average_odds_difference())
    print('Theil Index: ', met.theil_index())

In [14]:
display(Markdown('#### Dataset Metric - Original Dataset'))
print('Train Set: Mean Difference between Unprivileged and Privileged Group = %f'%dataset_metric_train_without_debias.mean_difference())
print(' Test Set: Mean Difference between Unprivileged and Privileged Group = %f'%dataset_metric_test_without_debias.mean_difference())

display(Markdown('#### Dataset Metric - Debiased Dataset'))
print('Train Set: Mean Difference between Unprivileged and Privileged Group = %f'%dataset_metric_train_with_debias.mean_difference())
print(' Test Set: Mean Difference between Unprivileged and Privileged Group = %f'%dataset_metric_test_with_debias.mean_difference())

display(Markdown('#### Classification Metric - Plain Model - Test Dataset'))
explain_metric(classified_metric_test_without_debias)

display(Markdown('#### Classification Metric - Debiased Model - Test Dataset'))
explain_metric(classified_metric_test_with_debias)

#### Dataset Metric - Original Dataset

Train Set: Mean Difference between Unprivileged and Privileged Group = -0.197623
 Test Set: Mean Difference between Unprivileged and Privileged Group = -0.187302


#### Dataset Metric - Debiased Dataset

Train Set: Mean Difference between Unprivileged and Privileged Group = -0.086453
 Test Set: Mean Difference between Unprivileged and Privileged Group = -0.097511


#### Classification Metric - Plain Model - Test Dataset

Accuracy:  0.8028390090766396
Balanced Accuray:  0.6698333445585645
Disparate Impact:  0.0
Equal Opportunity Difference:  -0.4943820224719101
Average Odds Difference:  -0.3092968918636131
Theil Index:  0.17231994300259793


#### Classification Metric - Debiased Model - Test Dataset

Accuracy:  0.7961509588480175
Balanced Accuray:  0.6761564882032738
Disparate Impact:  0.5379725700573138
Equal Opportunity Difference:  -0.08297344167676807
Average Odds Difference:  -0.0535387400338686
Theil Index:  0.16794294077661986


References:

1. B. H. Zhang, B. Lemoine, and M. Mitchell, "Mitigating UnwantedBiases with Adversarial Learning," 
AAAI/ACM Conference on Artificial Intelligence, Ethics, and Society, 2018.

2. https://github.com/Trusted-AI/AIF360/blob/master/examples/demo_adversarial_debiasing.ipynb