In [30]:
import numpy as np
np.random.seed(0)
import pandas as pd

from tqdm import tqdm
from aif360.datasets import StandardDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

from IPython.display import Markdown, display
from common_utils import compute_metrics

In [31]:
df = pd.read_csv("student-por.csv")
df.head(5)

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,0,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,4,0,11,11
1,GP,0,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,2,9,11,11
2,GP,0,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,6,12,13,12
3,GP,0,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,0,14,14,14
4,GP,0,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,0,11,13,13


In [32]:
# Step 2 - Gathering Graph Data for Sex
sexCounts = df.groupby(['sex', 'Dalc']).size()
print (sexCounts)
sexCounts.to_csv('temp.csv')

sex  Dalc
0    1       305
     2        58
     3        11
     4         7
     5         2
1    1       146
     2        63
     3        32
     4        10
     5        15
dtype: int64


In [33]:
# Step 2 - Gathering Graph Data for Age
ageCounts = df.groupby(['age', 'absences']).size()
print (ageCounts)
ageCounts.to_csv('temp.csv')

age  absences
15   0           47
     1            2
     2           21
     3            1
     4           16
                 ..
20   8            3
     12           1
21   0            1
     21           1
22   12           1
Length: 80, dtype: int64


In [34]:
# Step 3 - Sex|Dalc Fairness Metric Calculation
dataset_sex_dalc = StandardDataset(df, 
                               label_name='Dalc', 
                               favorable_classes=[1,2,3],
                               protected_attribute_names=['sex'], 
                               privileged_classes=[[0]],
                               features_to_keep=['age','Walc','absences']
                              )
privileged_groups_sex = [{'sex': 0}]
unprivileged_groups_sex = [{'sex': 1}]
print(dataset_sex_dalc)

               instance weights            features                     labels
                                protected attribute                           
                                                sex   age Walc absences       
instance names                                                                
0                           1.0                 0.0  18.0  1.0      4.0    1.0
1                           1.0                 0.0  17.0  1.0      2.0    1.0
2                           1.0                 0.0  15.0  3.0      6.0    1.0
3                           1.0                 0.0  15.0  1.0      0.0    1.0
4                           1.0                 0.0  16.0  2.0      0.0    1.0
...                         ...                 ...   ...  ...      ...    ...
644                         1.0                 0.0  19.0  2.0      4.0    1.0
645                         1.0                 0.0  18.0  1.0      4.0    1.0
646                         1.0                 0.0 

In [35]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_sex_dalc, 
                                             unprivileged_groups=unprivileged_groups_sex,
                                             privileged_groups=privileged_groups_sex)
display(Markdown("#### Original dataset"))
print("Disparate Imapct of unprivileged and privileged groups = %f" % metric_orig_train.disparate_impact())
print("SPD of unprivileged and privileged groups = %f" % metric_orig_train.statistical_parity_difference())

#### Original dataset

Disparate Imapct of unprivileged and privileged groups = 0.927818
SPD of unprivileged and privileged groups = -0.070486


In [36]:
# Step 3 - Sex|Walc Fairness Metric Calculation
dataset_sex_walc = StandardDataset(df, 
                               label_name='Walc', 
                               favorable_classes=[1,2,3],
                               protected_attribute_names=['sex'], 
                               privileged_classes=[[0]],
                               features_to_keep=['age','Dalc','absences']
                              )
print(dataset_sex_walc)

               instance weights            features                     labels
                                protected attribute                           
                                                sex   age Dalc absences       
instance names                                                                
0                           1.0                 0.0  18.0  1.0      4.0    1.0
1                           1.0                 0.0  17.0  1.0      2.0    1.0
2                           1.0                 0.0  15.0  2.0      6.0    1.0
3                           1.0                 0.0  15.0  1.0      0.0    1.0
4                           1.0                 0.0  16.0  1.0      0.0    1.0
...                         ...                 ...   ...  ...      ...    ...
644                         1.0                 0.0  19.0  1.0      4.0    1.0
645                         1.0                 0.0  18.0  1.0      4.0    1.0
646                         1.0                 0.0 

In [37]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_sex_walc, 
                                             unprivileged_groups=unprivileged_groups_sex,
                                             privileged_groups=privileged_groups_sex)
display(Markdown("#### Original dataset"))
print("Disparate Imapct of unprivileged and privileged groups = %f" % metric_orig_train.disparate_impact())
print("SPD of unprivileged and privileged groups = %f" % metric_orig_train.statistical_parity_difference())

#### Original dataset

Disparate Imapct of unprivileged and privileged groups = 0.711602
SPD of unprivileged and privileged groups = -0.260537


In [38]:
# Step 3 - Age|Dalc Fairness Metric Calculation
dataset_age_dalc = StandardDataset(df, 
                               label_name='Dalc', 
                               favorable_classes=[1,2,3],
                               protected_attribute_names=['age'], 
                               privileged_classes=[lambda x: x < 18],
                               features_to_keep=['sex','Walc','absences']
                              )
privileged_groups_age = [{'age': 1}]
unprivileged_groups_age = [{'age': 0}]
print(dataset_age_dalc)

               instance weights features                                    \
                                         protected attribute                 
                                     sex                 age Walc absences   
instance names                                                               
0                           1.0      0.0                 0.0  1.0      4.0   
1                           1.0      0.0                 1.0  1.0      2.0   
2                           1.0      0.0                 1.0  3.0      6.0   
3                           1.0      0.0                 1.0  1.0      0.0   
4                           1.0      0.0                 1.0  2.0      0.0   
...                         ...      ...                 ...  ...      ...   
644                         1.0      0.0                 0.0  2.0      4.0   
645                         1.0      0.0                 0.0  1.0      4.0   
646                         1.0      0.0                 0.0  1.

In [39]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_age_dalc, 
                                             unprivileged_groups=unprivileged_groups_age,
                                             privileged_groups=privileged_groups_age)
display(Markdown("#### Original dataset"))
print("Disparate Imapct of unprivileged and privileged groups = %f" % metric_orig_train.disparate_impact())
print("SPD of unprivileged and privileged groups = %f" % metric_orig_train.statistical_parity_difference())

#### Original dataset

Disparate Imapct of unprivileged and privileged groups = 0.940231
SPD of unprivileged and privileged groups = -0.057598


In [40]:
# Step 3 - Age|Walc Fairness Metric Calculation
dataset_age_walc = StandardDataset(df, 
                               label_name='Walc', 
                               favorable_classes=[1,2,3],
                               protected_attribute_names=['age'], 
                               privileged_classes=[lambda x: x < 18],
                               features_to_keep=['sex','Dalc','absences']
                              )
print(dataset_age_walc)

               instance weights features                                    \
                                         protected attribute                 
                                     sex                 age Dalc absences   
instance names                                                               
0                           1.0      0.0                 0.0  1.0      4.0   
1                           1.0      0.0                 1.0  1.0      2.0   
2                           1.0      0.0                 1.0  2.0      6.0   
3                           1.0      0.0                 1.0  1.0      0.0   
4                           1.0      0.0                 1.0  1.0      0.0   
...                         ...      ...                 ...  ...      ...   
644                         1.0      0.0                 0.0  1.0      4.0   
645                         1.0      0.0                 0.0  1.0      4.0   
646                         1.0      0.0                 0.0  1.

In [41]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_age_walc, 
                                             unprivileged_groups=unprivileged_groups_age,
                                             privileged_groups=privileged_groups_age)
display(Markdown("#### Original dataset"))
print("Disparate Imapct of unprivileged and privileged groups = %f" % metric_orig_train.disparate_impact())
print("SPD of unprivileged and privileged groups = %f" % metric_orig_train.statistical_parity_difference())

#### Original dataset

Disparate Imapct of unprivileged and privileged groups = 0.950802
SPD of unprivileged and privileged groups = -0.039737


In [42]:
# Step 3 - Preprocessing Algorithm - Sex|Dalc Fairness Metric Calculation
RW = Reweighing(unprivileged_groups=unprivileged_groups_sex,
                privileged_groups=privileged_groups_sex)
dataset_transf_sex_dalc = RW.fit_transform(dataset_sex_dalc)

In [43]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_transf_sex_dalc, 
                                             unprivileged_groups=unprivileged_groups_sex,
                                             privileged_groups=privileged_groups_sex)
display(Markdown("#### Transformed dataset"))
print("Disparate Imapct of unprivileged and privileged groups = %f" % metric_orig_train.disparate_impact())
print("SPD of unprivileged and privileged groups = %f" % metric_orig_train.statistical_parity_difference())

#### Transformed dataset

Disparate Imapct of unprivileged and privileged groups = 1.000000
SPD of unprivileged and privileged groups = 0.000000


In [44]:
# Step 3 - Preprocessing Algorithm - Age|Dalc Fairness Metric Calculation
RW = Reweighing(unprivileged_groups=unprivileged_groups_age,
                privileged_groups=privileged_groups_age)
dataset_transf_age_dalc = RW.fit_transform(dataset_age_dalc)

In [45]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_transf_age_dalc, 
                                             unprivileged_groups=unprivileged_groups_age,
                                             privileged_groups=privileged_groups_age)
display(Markdown("#### Transformed dataset"))
print("Disparate Imapct of unprivileged and privileged groups = %f" % metric_orig_train.disparate_impact())
print("SPD of unprivileged and privileged groups = %f" % metric_orig_train.statistical_parity_difference())

#### Transformed dataset

Disparate Imapct of unprivileged and privileged groups = 1.000000
SPD of unprivileged and privileged groups = 0.000000


In [46]:
# Rename variable for Step 4 (Sex)
dataset_orig = StandardDataset(df, 
                               label_name='Dalc', 
                               favorable_classes=[1,2,3],
                               protected_attribute_names=['sex'], 
                               privileged_classes=[[0]],
                               features_to_keep=['age','Walc','absences']
                              )
privileged_groups = [{'sex': 0}]
unprivileged_groups = [{'sex': 1}]

In [47]:
# Rename variable for Step 4 (Sex)
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf = RW.fit_transform(dataset_orig)

In [48]:
dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.7], shuffle=True)
dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.5], shuffle=True)

In [49]:
# Logistic regression classifier and predictions
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
w_train = dataset_orig_train.instance_weights.ravel()

lmod = LogisticRegression()
lmod.fit(X_train, y_train, 
         sample_weight=dataset_orig_train.instance_weights)
y_train_pred = lmod.predict(X_train)

# positive class index
pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]

dataset_orig_train_pred = dataset_orig_train.copy()
dataset_orig_train_pred.labels = y_train_pred

In [50]:
dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
X_valid = scale_orig.transform(dataset_orig_valid_pred.features)
y_valid = dataset_orig_valid_pred.labels
dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)

dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_orig.transform(dataset_orig_test_pred.features)
y_test = dataset_orig_test_pred.labels
dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

In [51]:
num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):
    fav_inds = dataset_orig_valid_pred.scores > class_thresh
    dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label
    dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label
    classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,
                                             dataset_orig_valid_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
    
    ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())
best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

In [52]:
display(Markdown("#### Predictions from original testing data"))
bal_acc_arr_orig = []
disp_imp_arr_orig = []
avg_odds_diff_arr_orig = []

print("Classification threshold used = %.4f" % best_class_thresh)
for thresh in tqdm(class_thresh_arr):
    
    if thresh == best_class_thresh:
        disp = True
    else:
        disp = False
    
    fav_inds = dataset_orig_test_pred.scores > thresh
    dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label
    dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label
    
    metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                                      unprivileged_groups, privileged_groups,
                                      disp = disp)

    bal_acc_arr_orig.append(metric_test_bef["Balanced accuracy"])
    avg_odds_diff_arr_orig.append(metric_test_bef["Average odds difference"])
    disp_imp_arr_orig.append(metric_test_bef["Disparate impact"])

#### Predictions from original testing data

Classification threshold used = 0.7821


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]invalid value encountered in double_scalars
invalid value encountered in double_scalars
100%|██████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1190.50it/s]

Balanced accuracy = 0.8175
Statistical parity difference = -0.1042
Disparate impact = 0.8958
Average odds difference = nan
Equal opportunity difference = -0.0667
Theil index = 0.0351





In [53]:
dataset_transf_train, dataset_transf_vt = dataset_transf.split([0.7], shuffle=True)
dataset_transf_valid, dataset_transf_test = dataset_transf_vt.split([0.5], shuffle=True)

In [54]:
# Logistic regression classifier and predictions
scale_orig = StandardScaler()
X_train = scale_orig.fit_transform(dataset_transf_train.features)
y_train = dataset_transf_train.labels.ravel()
w_train = dataset_transf_train.instance_weights.ravel()

lmod = LogisticRegression()
lmod.fit(X_train, y_train, 
         sample_weight=dataset_transf_train.instance_weights)
y_train_pred = lmod.predict(X_train)

# positive class index
pos_ind = np.where(lmod.classes_ == dataset_transf_train.favorable_label)[0][0]

dataset_transf_train_pred = dataset_transf_train.copy()
dataset_transf_train_pred.labels = y_train_pred

In [55]:
dataset_transf_valid_pred = dataset_transf_valid.copy(deepcopy=True)
X_valid = scale_orig.transform(dataset_transf_valid_pred.features)
y_valid = dataset_transf_valid_pred.labels
dataset_transf_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)

dataset_transf_test_pred = dataset_transf_test.copy(deepcopy=True)
X_test = scale_orig.transform(dataset_transf_test_pred.features)
y_test = dataset_transf_test_pred.labels
dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

In [56]:
num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):
    fav_inds = dataset_transf_valid_pred.scores > class_thresh
    dataset_transf_valid_pred.labels[fav_inds] = dataset_transf_valid_pred.favorable_label
    dataset_transf_valid_pred.labels[~fav_inds] = dataset_transf_valid_pred.unfavorable_label
    classified_metric_orig_valid = ClassificationMetric(dataset_transf_valid,
                                             dataset_transf_valid_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
    
    ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())
best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

In [57]:
display(Markdown("#### Predictions from transformed testing data"))
bal_acc_arr_orig = []
disp_imp_arr_orig = []
avg_odds_diff_arr_orig = []

print("Classification threshold used = %.4f" % best_class_thresh)
for thresh in tqdm(class_thresh_arr):
    
    if thresh == best_class_thresh:
        disp = True
    else:
        disp = False
    
    fav_inds = dataset_transf_test_pred.scores > thresh
    dataset_transf_test_pred.labels[fav_inds] = dataset_transf_test_pred.favorable_label
    dataset_transf_test_pred.labels[~fav_inds] = dataset_transf_test_pred.unfavorable_label
    
    metric_test_bef = compute_metrics(dataset_transf_test, dataset_transf_test_pred, 
                                      unprivileged_groups, privileged_groups,
                                      disp = disp)

    bal_acc_arr_orig.append(metric_test_bef["Balanced accuracy"])
    avg_odds_diff_arr_orig.append(metric_test_bef["Average odds difference"])
    disp_imp_arr_orig.append(metric_test_bef["Disparate impact"])

#### Predictions from transformed testing data

Classification threshold used = 0.8019


  0%|                                                                                          | 0/100 [00:00<?, ?it/s]

Balanced accuracy = 0.8310

100%|██████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 1176.49it/s]


Statistical parity difference = 0.0721
Disparate impact = 1.0811
Average odds difference = 0.3569
Equal opportunity difference = 0.0471
Theil index = 0.0603



