In [1]:
#basics
import pandas as pd
import numpy as np
from numpy import mean
# Graphs libraries
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
plt.style.use('seaborn-white')
import seaborn as sns
from IPython.display import Markdown, display
#import plotly.offline as py
#py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as ff
from plotly import tools

from collections import Counter
from tqdm import tqdm
#Fairlearn and imblearn 
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as imbpipeline
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
#scklearn 
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import TruncatedSVD
#AIF360
from aif360.datasets import StandardDataset
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.sklearn.metrics import equal_opportunity_difference
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.preprocessing.lfr import LFR
from aif360.algorithms.preprocessing import DisparateImpactRemover

In [2]:
from collections import OrderedDict
from aif360.metrics import ClassificationMetric
def compute_metrics(dataset_true, dataset_pred, 
                    unprivileged_groups, privileged_groups,
                    disp = True):
    """ Compute the key metrics """
    classified_metric_pred = ClassificationMetric(dataset_true,
                                                 dataset_pred, 
                                                 unprivileged_groups=unprivileged_groups,
                                                 privileged_groups=privileged_groups)
    metrics = OrderedDict()
    metrics["Accuracy"] = classified_metric_pred.accuracy()
    metrics["Balanced accuracy"] = 0.5*(classified_metric_pred.true_positive_rate()+
                                             classified_metric_pred.true_negative_rate())
    metrics["Average odds difference"] = classified_metric_pred.average_odds_difference()
    metrics["Average absolute odds difference"] = classified_metric_pred.average_abs_odds_difference()
    metrics["Disparate impact"] = classified_metric_pred.disparate_impact()
    metrics["Statistical parity difference (mean difference)"] = classified_metric_pred.statistical_parity_difference()
    metrics["Equal opportunity difference"] = classified_metric_pred.equal_opportunity_difference()
    metrics["Theil index"] = classified_metric_pred.theil_index()
    metrics["Consistency"] = classified_metric_pred.consistency()
    ##ADD MORE METRICS FROM CLASSIFICATIONMETRIC HERE
    if disp:
        for k in metrics:
            print("%s = %.4f" % (k, metrics[k]))
    
    return metrics

In [3]:
# define dataset
data = pd.read_csv('dataset_ready2.csv') 
data = data.set_index('ORG_org_uuid')

#More cleaning
data = data.drop(['Unnamed: 0'], axis = 1)
data_orig = data.copy()
#data = data.drop(data[data.unknown_founders != 0].index)
data = data.query('unknown_founders == 0 and female_founders != 0 or male_founders != 0')

data = data.drop(data[data.total_num_founders > 10].index)
data = data.drop(data[data.status == 'operating'].index)
#data.info()

In [4]:
cols_to_drop = ['company_name', 'category_list', 'category_group_list',
               'region', 'city', 'homepage_url', 'founded_on', 'male_founders', 'female_founders'
               ,'unknown_founders']
data.drop(columns = cols_to_drop, inplace = True)
#data_bef_scale = data.copy()

In [5]:
## CREATE A BINARY OUTCOME VARIABLE
#y_variable_bin = y_variable.replace('operating', 1, regex = True)
data.replace('ipo', 1, regex = True, inplace = True)
data.replace('acquired', 1, regex = True, inplace = True)
data.replace('closed', 0, regex = True, inplace = True)
Counter(data.status)

Counter({0: 5038, 1: 13429})

### Transform to AIF360 compatible data 

In [6]:
privileged_groups = [{'mostly_male_founders': 1}]
unprivileged_groups = [{'mostly_male_founders': 0}]


dataset_orig = StandardDataset(data, 
                          label_name='status', 
                          favorable_classes=[1], 
                          protected_attribute_names=['mostly_male_founders'], 
                          privileged_classes=[[1]])

### Split into train, validation and test

In [7]:
dataset_orig_train, dataset_orig_vt = dataset_orig.split([0.75], shuffle=True, seed = 10)
dataset_orig_valid, dataset_orig_test = dataset_orig_vt.split([0.2], shuffle=True, seed = 10)


### Metric for the original training data

In [10]:
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes = %f" % metric_orig_train.mean_difference())
print("Disparate Impact = %f" %metric_orig_train.disparate_impact())

metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
display(Markdown("#### Original test dataset"))
print("Difference in mean outcomes = %f" % metric_orig_test.mean_difference())
print("Disparate Impact = %f" %metric_orig_test.disparate_impact())

#### Original training dataset

Difference in mean outcomes = -0.074371
Disparate Impact = 0.898739


#### Original test dataset

Difference in mean outcomes = -0.076516
Disparate Impact = 0.896921


In [11]:
learning = LFR(unprivileged_groups = unprivileged_groups, privileged_groups = privileged_groups)
fit_learn = learning.fit(dataset_orig_train)
dataset_orig_train = learning.transform(dataset_orig_train)

In [12]:
# Logistic regression classifier and predictions
scale_orig = MinMaxScaler()
X_train = scale_orig.fit_transform(dataset_orig_train.features)
y_train = dataset_orig_train.labels.ravel()
w_train = dataset_orig_train.instance_weights.ravel()

lmod = LogisticRegression(solver='liblinear')
lmod.fit(X_train, y_train, 
         sample_weight=dataset_orig_train.instance_weights)
y_train_pred = lmod.predict(X_train)

# positive class index
pos_ind = np.where(lmod.classes_ == dataset_orig_train.favorable_label)[0][0]

dataset_orig_train_pred = dataset_orig_train.copy()
dataset_orig_train_pred.labels = y_train_pred

In [13]:
dataset_orig_valid_pred = dataset_orig_valid.copy(deepcopy=True)
X_valid = scale_orig.transform(dataset_orig_valid_pred.features)
y_valid = dataset_orig_valid_pred.labels
dataset_orig_valid_pred.scores = lmod.predict_proba(X_valid)[:,pos_ind].reshape(-1,1)

dataset_orig_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_orig.transform(dataset_orig_test_pred.features)
y_test = dataset_orig_test_pred.labels
dataset_orig_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

In [14]:

### Scores for original validation and test sets 
num_thresh = 100
ba_arr = np.zeros(num_thresh)
class_thresh_arr = np.linspace(0.01, 0.99, num_thresh)
for idx, class_thresh in enumerate(class_thresh_arr):
    
    fav_inds = dataset_orig_valid_pred.scores > class_thresh
    dataset_orig_valid_pred.labels[fav_inds] = dataset_orig_valid_pred.favorable_label
    dataset_orig_valid_pred.labels[~fav_inds] = dataset_orig_valid_pred.unfavorable_label
    
    classified_metric_orig_valid = ClassificationMetric(dataset_orig_valid,
                                             dataset_orig_valid_pred, 
                                             unprivileged_groups=unprivileged_groups,
                                             privileged_groups=privileged_groups)
    
    ba_arr[idx] = 0.5*(classified_metric_orig_valid.true_positive_rate()\
                       +classified_metric_orig_valid.true_negative_rate())

best_ind = np.where(ba_arr == np.max(ba_arr))[0][0]
best_class_thresh = class_thresh_arr[best_ind]

print("Best balanced accuracy (no reweighing) = %.4f" % np.max(ba_arr))
print("Optimal classification threshold (no reweighing) = %.4f" % best_class_thresh)

Best balanced accuracy (no reweighing) = 0.4966
Optimal classification threshold (no reweighing) = 0.0793


In [15]:
display(Markdown("#### Predictions from original testing data"))
print("Classification threshold used = %.4f" % best_class_thresh)
for thresh in tqdm(class_thresh_arr):
    
    if thresh == best_class_thresh:
        disp = True
    else:
        disp = False
    
    fav_inds = dataset_orig_test_pred.scores > thresh
    dataset_orig_test_pred.labels[fav_inds] = dataset_orig_test_pred.favorable_label
    dataset_orig_test_pred.labels[~fav_inds] = dataset_orig_test_pred.unfavorable_label
    
    metric_test_bef = compute_metrics(dataset_orig_test, dataset_orig_test_pred, 
                                      unprivileged_groups, privileged_groups,
                                      disp = disp)



#### Predictions from original testing data

Classification threshold used = 0.0793


  9%|████                                         | 9/100 [00:01<00:13,  6.76it/s]

Accuracy = 0.5628
Balanced accuracy = 0.4873
Average odds difference = 0.2393
Average absolute odds difference = 0.2393
Disparate impact = 1.3818
Statistical parity difference (mean difference) = 0.2407
Equal opportunity difference = 0.2396
Theil index = 0.3521
Consistency = 0.7434


100%|███████████████████████████████████████████| 100/100 [00:14<00:00,  7.08it/s]


## Train with and transform the original training data - Reweighing 
"Reweighing won't change the training data at all, rather it will learn new weights for each training row that will make the mean difference in outcomes between the specified groups 0."
https://www.kaggle.com/garethjns/titanicsexism-fairness-in-ml

In [16]:
RW = Reweighing(unprivileged_groups=unprivileged_groups,
               privileged_groups=privileged_groups)
RW.fit(dataset_orig_train)
dataset_transf_train = RW.transform(dataset_orig_train)

### Metric with the reweighed training data 

In [17]:
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train, 
                                         unprivileged_groups=unprivileged_groups,
                                         privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())
assert np.abs(metric_transf_train.mean_difference()) < 1e-6


#### Transformed training dataset

Difference in mean outcomes between unprivileged and privileged groups = -0.000000


## Train classifier on transformed data

In [18]:
scale_transf = StandardScaler()
X_train = scale_transf.fit_transform(dataset_transf_train.features)
y_train = dataset_transf_train.labels.ravel()

lmod = LogisticRegression(solver='liblinear')
lmod.fit(X_train, y_train,
        sample_weight=dataset_transf_train.instance_weights)
y_train_pred = lmod.predict(X_train)

### Obtain scores for transformed test set 

In [19]:
dataset_transf_test_pred = dataset_orig_test.copy(deepcopy=True)
X_test = scale_transf.fit_transform(dataset_transf_test_pred.features)


y_test = dataset_transf_test_pred.labels
dataset_transf_test_pred.scores = lmod.predict_proba(X_test)[:,pos_ind].reshape(-1,1)

## Predictions from the transformed test set at the optimal classification threshold


In [20]:

print("Classification threshold used = %.4f" % best_class_thresh)
for thresh in tqdm(class_thresh_arr):
    
    if thresh == best_class_thresh:
        disp = True
    else:
        disp = False
    
    fav_inds = dataset_transf_test_pred.scores > thresh
    dataset_transf_test_pred.labels[fav_inds] = dataset_transf_test_pred.favorable_label
    dataset_transf_test_pred.labels[~fav_inds] = dataset_transf_test_pred.unfavorable_label
    
    metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_pred, 
                                      unprivileged_groups, privileged_groups,
                                      disp = disp)

Classification threshold used = 0.0793


  9%|████                                         | 9/100 [00:01<00:15,  5.99it/s]

Accuracy = 0.7225
Balanced accuracy = 0.5784
Average odds difference = 0.0199
Average absolute odds difference = 0.0203
Disparate impact = 1.0009
Statistical parity difference (mean difference) = 0.0007
Equal opportunity difference = -0.0004
Theil index = 0.1378
Consistency = 0.7434


100%|███████████████████████████████████████████| 100/100 [00:14<00:00,  7.09it/s]
