# Disparate Mistreatment Remover Example

Examples and explanations for function parameters of the disparate mistreatment remover https://github.com/mbilalzafar/fair-classification/tree/master/disparate_mistreatment.


In [1]:
import sys
import numpy as np
sys.path.append("../")

from aif360.datasets import GermanDataset
from aif360.metrics import ClassificationMetric
from fairensics.data.synthetic_dataset import SyntheticDataset
from fairensics.data.decision_boundary import DecisionBoundary 
from fairensics.methods import DisparateMistreatment
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
# helper function to print evaluations
def print_evaluation(dataset, clf, unprivileged_groups, privileged_groups):
    predictions = clf.predict(dataset)

    metric = ClassificationMetric(dataset,
                                  predictions,
                                  unprivileged_groups=unprivileged_groups,
                                  privileged_groups=privileged_groups)

    print("p-rule: \t", metric.disparate_impact())
    print('accuracy: \t', metric.accuracy())
    

# 0.1 Load data

In [3]:
gcd = GermanDataset()

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

gcd_train, gcd_test = gcd.split([0.7], shuffle=True)

# 0.2  Metric
- Disparate mistreatment/equalized odds

The error rates must be balanced.

## 0.2.1 Definition
- $z$ the protected features
- $X$ unprotected features
- $\hat{y}$ the predicted label
- $\hat{y}=1$ the positive label  (e.g. getting a loan)
- $P_i(\hat{y}=1)$ the probability for members of group _i_ to be assigned the positive label.

There are four types of error rates that may be balanced:

- false positives
- false negatives
- false omissions
- false discovery


## 0.2.2 Method
- Decision boundary covariance:

$$cov(g(y, x), z) \approx \frac{1}{N} \sum  (z_i - \bar{z})g(y, x)$$

- $g(x,y)$ now depends on the true outcome $y$
- Solver: CVXPY and DCCP


# 0.3 Parameters
**- loss_function [string]**
- 'logreg' (default)
- 'svm_linear

- [logreg_2]

**- constraint_type [string]**
 - None (default)
 - "all"
 - "fpr"
 - "fnr"
 - "fprfnr"

**- sensitive_attrs_to_cov_thresh [dict], optional, used**
- covariance threshold for each sensitive attribute {'attribute1_name':tresh_1, ...}

**- take_initial_sol [bool]**
- if true, the DCCP solver uses the initial solution of the unconstrained solver as starting point

**- Solver related Parameters**
 - tau=0.5
 - mu=1.2
 - EPS=1e-6

In [4]:
dspmis = DisparateMistreatment(warn=False)
print("DisparateMistreatment without constraint")
dspmis.fit(gcd_train)
print_evaluation(gcd_test, dspmis, unprivileged_groups, privileged_groups)

DisparateMistreatment without constraint
p-rule: 	 0.9574720210664911
accuracy: 	 0.7266666666666667


In [5]:
dspmis = DisparateMistreatment(constraint_type="all", warn=False)
print("DisparateMistreatment with combined constraint")
dspmis.fit(gcd_train)
print_evaluation(gcd_test, dspmis, unprivileged_groups, privileged_groups)

DisparateMistreatment with combined constraint
p-rule: 	 0.9574720210664911
accuracy: 	 0.7266666666666667




In [6]:
dspmis = DisparateMistreatment(constraint_type="fpr", warn=False)
print("DisparateMistreatment with fpr constraint")
dspmis.fit(gcd_train)
print_evaluation(gcd_test, dspmis, unprivileged_groups, privileged_groups)

DisparateMistreatment with fpr constraint
p-rule: 	 0.9574720210664911
accuracy: 	 0.7266666666666667




In [7]:
dspmis = DisparateMistreatment(constraint_type="fnr", warn=False)
print("DisparateMistreatment with fnr constraint")
dspmis.fit(gcd_train)
print_evaluation(gcd_test, dspmis, unprivileged_groups, privileged_groups)

DisparateMistreatment with fnr constraint
p-rule: 	 0.9574720210664911
accuracy: 	 0.7266666666666667




In [8]:
dspmis = DisparateMistreatment(constraint_type="fprfnr", warn=False)
print("DisparateMistreatment with fnr constraint")
dspmis.fit(gcd_train)
print_evaluation(gcd_test, dspmis, unprivileged_groups, privileged_groups)

DisparateMistreatment with fnr constraint
p-rule: 	 0.9574720210664911
accuracy: 	 0.7266666666666667


