In [1]:
import sys
import pathlib

sys.path.insert(0, str(pathlib.Path().absolute().parent))

%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
from IPython.display import display, Markdown, Latex

import model_bias
import model_bias.visualisation.plot

---

### Parameters

In [3]:
def load_data():
    data, _metadata = AdultDataset().convert_to_dataframe(de_dummy_code=True, set_category=True)
    data["race"] = data["race"].astype("category")
    data["sex"] = data["sex"].astype("category")
    return data

In [4]:
def load_model():
    # the model should be persisted somewhere ideally and this method should just retrieve it
    import pandas as pd
    from xgboost import XGBClassifier
    
    ds = AdultDataset() # note that this is the same dataset than the one we use in this report

    model = XGBClassifier()

    y = ds.scores.ravel()
    X = ds.features

    model.fit(X, y)
    
    predict_ = model.predict
    def predict_label(*args, **kwargs):
        labels = predict_(*args, **kwargs)
        return [">50K" if l else "<=50K" for l in labels]
    model.predict = predict_label
    
    return model

In [5]:
PROTECTED_ATTRIBUTES = [
    "race",
    "sex"
]
PRIVILEGED_CATEGORIES = [
    ["White"],
    ["Male"]
]

In [6]:
# TODO => allow multiple variables
TARGET_VARIABLE = "income-per-year"
PRIVILEGED_CLASS = ">50K"

---

# Dataset Exploration

In [7]:
data = load_data()
data.head()



Unnamed: 0,age,education-num,race,sex,capital-gain,capital-loss,hours-per-week,income-per-year,workclass,education,marital-status,occupation,relationship,native-country
0,39.0,13.0,White,Male,2174.0,0.0,40.0,<=50K,State-gov,Bachelors,Never-married,Adm-clerical,Not-in-family,United-States
1,50.0,13.0,White,Male,0.0,0.0,13.0,<=50K,Self-emp-not-inc,Bachelors,Married-civ-spouse,Exec-managerial,Husband,United-States
2,38.0,9.0,White,Male,0.0,0.0,40.0,<=50K,Private,HS-grad,Divorced,Handlers-cleaners,Not-in-family,United-States
3,53.0,7.0,Non-white,Male,0.0,0.0,40.0,<=50K,Private,11th,Married-civ-spouse,Handlers-cleaners,Husband,United-States
4,28.0,13.0,Non-white,Female,0.0,0.0,40.0,<=50K,Private,Bachelors,Married-civ-spouse,Prof-specialty,Wife,Cuba


In [8]:
display_str = (
f"The dataset is composed of $\color{{blue}}{{{data.shape[0]}}}$ observations and "
f"has $\color{{blue}}{{{data.shape[1]}}}$ columns among which $\color{{blue}}{{{len(PROTECTED_ATTRIBUTES)}}}$ have been identified "
f"as protected attributes. \n\n"
f"The protected variables are {', '.join('`%s`' % v for v in PROTECTED_ATTRIBUTES)}. \n\n"
f"The target variable is `{TARGET_VARIABLE}`."
)
display(Markdown(display_str))

The dataset is composed of $\color{blue}{45222}$ observations and has $\color{blue}{14}$ columns among which $\color{blue}{2}$ have been identified as protected attributes. 

The protected variables are `race`, `sex`. 

The target variable is `income-per-year`.

In [9]:
# parameters validation
if TARGET_VARIABLE not in data.columns:
    raise ValueError(f"The target variable {TARGET_VARIABLE} is not a column of the dataset.")
if PRIVILEGED_CLASS not in data[TARGET_VARIABLE].unique():
    raise ValueError(f"The category {PRIVILEGED_CLASS} does not exist in {TARGET_VARIABLE}.")
    
for protected_attr, privileged_cats in zip(PROTECTED_ATTRIBUTES, PRIVILEGED_CATEGORIES):
    if protected_attr not in data.columns:
        raise ValueError(f"The protected variable {protected_attr} is not a column of the dataset.")
    for privileged_cat in privileged_cats:
        if privileged_cat not in data[protected_attr].unique():
            raise ValueError(f"The privileged category {privileged_cat} does not exist in {protected_attr}.")

In [10]:
from aif360.datasets import StandardDataset

data_copy = data.copy()
data_copy[TARGET_VARIABLE] = data_copy[TARGET_VARIABLE].astype(str)
data_copy[PROTECTED_ATTRIBUTES] = data_copy[PROTECTED_ATTRIBUTES].astype(str)

ds = StandardDataset(
    data_copy, 
    label_name=TARGET_VARIABLE, 
    favorable_classes=[PRIVILEGED_CLASS]*len(PROTECTED_ATTRIBUTES),
    protected_attribute_names=PROTECTED_ATTRIBUTES,
    privileged_classes=PRIVILEGED_CATEGORIES,
    categorical_features=list(data_copy.select_dtypes(include=["category"]).columns)
)

## Target Overall Proportion

In [11]:
display_str = (
f"We now look at the overall proportion of each categories in the dataset. \n\n"
f"We see that we have $\color{{blue}}{{{data[TARGET_VARIABLE].nunique()}}}$ category: \n\n"
)

for value, counts in data[TARGET_VARIABLE].value_counts().items():
    display_str += f"- the value '$\color{{orange}}{{\\text{{{value}}}}}$' represents $\color{{blue}}{{{counts}}}$ records ($\color{{blue}}{{{counts/len(data)*100:0.2f}\%}}$)\n\n"

display(Markdown(display_str))

We now look at the overall proportion of each categories in the dataset. 

We see that we have $\color{blue}{2}$ category: 

- the value '$\color{orange}{\text{<=50K}}$' represents $\color{blue}{34014}$ records ($\color{blue}{75.22\%}$)

- the value '$\color{orange}{\text{>50K}}$' represents $\color{blue}{11208}$ records ($\color{blue}{24.78\%}$)



In [12]:
display(Markdown(f"**Target Variable Name:** `{TARGET_VARIABLE}`"))
_ = model_bias.visualisation.plot.counts(TARGET_VARIABLE, data=data)

**Target Variable Name:** `income-per-year`

## Protected Attributes Proportion

We also investigate the proportion of the categories of protected attributes.

In [13]:
for protected_attr in PROTECTED_ATTRIBUTES:
    display_str = f"For the attribute `{protected_attr}` we have the following proportions: \n\n"
    for value, counts in data[protected_attr].value_counts().items():
        display_str += f"- the value '$\color{{orange}}{{\\text{{{value}}}}}$' represents $\color{{blue}}{{{counts}}}$ records ($\color{{blue}}{{{counts/len(data)*100:0.2f}\%}}$)\n\n"
    display(Markdown(display_str))
    
    _ = model_bias.visualisation.plot.counts(protected_attr, data=data)
    display(Markdown(""))

For the attribute `race` we have the following proportions: 

- the value '$\color{orange}{\text{White}}$' represents $\color{blue}{38903}$ records ($\color{blue}{86.03\%}$)

- the value '$\color{orange}{\text{Non-white}}$' represents $\color{blue}{6319}$ records ($\color{blue}{13.97\%}$)





For the attribute `sex` we have the following proportions: 

- the value '$\color{orange}{\text{Male}}$' represents $\color{blue}{30527}$ records ($\color{blue}{67.50\%}$)

- the value '$\color{orange}{\text{Female}}$' represents $\color{blue}{14695}$ records ($\color{blue}{32.50\%}$)





## Target by Protected Attribute Proportion

From a bias perspective we are interested in the difference of proportions given the protected variable.

We plot below the proportions for the target variable given each of the protected categories:

In [14]:
for protected_attr in PROTECTED_ATTRIBUTES:
    display_str = (
        f"**Protected Variable Name:** `{protected_attr}`&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"
        f"**Target Variable Name:** `{TARGET_VARIABLE}`"
    )
    display(Markdown(display_str))
    _ = model_bias.visualisation.plot.counts(TARGET_VARIABLE, protected_attr, data=data)
    display(Markdown(""))

**Protected Variable Name:** `race`&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**Target Variable Name:** `income-per-year`



**Protected Variable Name:** `sex`&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;**Target Variable Name:** `income-per-year`



## Original Bias

In [15]:
privileged_groups = [dict(zip(ds.protected_attribute_names, ds.privileged_protected_attributes))]
unprivileged_groups = [{n: v} for n, v in zip(ds.protected_attribute_names, ds.unprivileged_protected_attributes)]

metrics = BinaryLabelDatasetMetric(
    ds,
    privileged_groups=privileged_groups,
    unprivileged_groups=unprivileged_groups,
)

#### Individual Fairness

The individual fairness metric measures how similar the labels are for similar instances. It uses the K-Nearest Neighbors algorithm to find the neighbors of each observation and compute how far the target variable is from its neighbor according to the equation below:

\begin{equation*}
1 - \frac{1}{n \cdot k}\sum_{i=1}^n |\hat{y}_i - \sum_{j\in\mathcal{N}_{k}(x_i)} \hat{y}_j|
\end{equation*}

In [16]:
consistency = metrics.consistency(n_neighbors=5)[0]

display_str = (
    f"We find the consistency of this dataset to be: $\color{{blue}}{{{consistency*100:0.2f}\%}}$."
    f"We want this metric to be as close as possible to $\color{{green}}{{100\%}}$."
)
display(Markdown(display_str))

Pass n_neighbors=5 as keyword args. From version 0.25 passing these as positional arguments will result in an error


We find the consistency of this dataset to be: $\color{blue}{84.89\%}$.We want this metric to be as close as possible to $\color{green}{100\%}$.

In [17]:
X = pd.DataFrame(
    [{"Consistency": "Consistency"} for i in range(int(consistency*10000))] + 
    [{"Consistency": "Total"} for i in range(int(10000-consistency*10000))]
)
fig = model_bias.visualisation.plot.pie_proportion(consistency, target=1., title="Consistency")

#### Disparate impact

The disparate impact is the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group.

\begin{equation*}
\frac{\mathbb{P}(Y = 1 | Z = \text{unprivileged})}
   {\mathbb{P}(Y = 1 | Z = \text{privileged})}
\end{equation*}

In [18]:
disparate_impact = metrics.disparate_impact()

display_str = (
    f"The disparate impact is: $\color{{blue}}{{{disparate_impact*100:0.2f}\%}}$. "
    f"We want this metric to be as close to $\color{{green}}{{100\%}}$ as possible."
)
display(Markdown(display_str))

The disparate impact is: $\color{blue}{41.66\%}$. We want this metric to be as close to $\color{green}{100\%}$ as possible.

In [19]:
fig = model_bias.visualisation.plot.pie_proportion(disparate_impact, target=1.0, title="Disparate Impact")

#### Parity difference

The parity difference is the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group.

\begin{equation*}
\mathbb{P}(Y = 1 | Z = \text{unprivileged}) - \mathbb{P}(Y = 1 | Z = \text{privileged})
\end{equation*}

In [20]:
parity_difference = metrics.statistical_parity_difference()

display_str = (
    f"The parity difference is: $\color{{blue}}{{{parity_difference:0.3f}}}$. "
    f"We want this metric to be as close possible to $\color{{green}}{{0}}$ as possible"
)
display(Markdown(display_str))

The parity difference is: $\color{blue}{-0.189}$. We want this metric to be as close possible to $\color{green}{0}$ as possible

In [21]:
fig = model_bias.visualisation.plot.pie_proportion(
    (parity_difference+1)/2, 
    target=0.5, 
    label=f"{parity_difference:0.3f}",     
    title="Parity Difference"
)

## Conclusion on the dataset

TO BE COMPLETED BY THE USER

# Model Testing

In [22]:
model = load_model()
y_hat = model.predict(ds.features)

data_hat = data.copy()
data_hat[TARGET_VARIABLE] = y_hat
data_hat[TARGET_VARIABLE] = data_hat[TARGET_VARIABLE].astype(str)
data_hat[PROTECTED_ATTRIBUTES] = data_hat[PROTECTED_ATTRIBUTES].astype(str)

ds_hat = StandardDataset(
    data_hat, 
    label_name=TARGET_VARIABLE, 
    favorable_classes=[PRIVILEGED_CLASS]*len(PROTECTED_ATTRIBUTES),
    protected_attribute_names=PROTECTED_ATTRIBUTES,
    privileged_classes=PRIVILEGED_CATEGORIES,
    categorical_features=list(data_copy.select_dtypes(include=["category"]).columns)
)



In [23]:
classification_metrics = ClassificationMetric(
    ds, ds_hat, 
    privileged_groups=privileged_groups,
    unprivileged_groups=unprivileged_groups
)

#### True Positite Rate (TPR)

In [24]:
display_str = (
    f"This represents the ratio of observations that have been correctly classified as positive by the model. "
    f"That is the number of observations for which the model has correclty assigned the label '$\color{{orange}}{{\\text{{{PRIVILEGED_CLASS}}}}}$'.\n\n"
)
display(Markdown(display_str))

This represents the ratio of observations that have been correctly classified as positive by the model. That is the number of observations for which the model has correclty assigned the label '$\color{orange}{\text{>50K}}$'.



\begin{equation*}
TPR = \frac{TP}{TP+FN}
\end{equation*}

We are interested by the difference between the TPR for the priviled group and the TPR for the unprivileged group.

\begin{equation*}
TPR_{\text{unprivileged}} - TPR_{\text{privileged}}
\end{equation*}

In [25]:
tpr_diff = classification_metrics.true_positive_rate_difference()

display_str = (
    f"The TPR difference is $\color{{blue}}{{{tpr_diff:0.3f}}}$. "
    f"We want this metric to be as close to $\color{{green}}{{0}}$ as possible."
)
display(Markdown(display_str))

The TPR difference is $\color{blue}{-0.039}$. We want this metric to be as close to $\color{green}{0}$ as possible.

#### False Positite Rate (FPR)

In [26]:
display_str = (
    f"This represents the ratio of observations that have been incorrectly classified as positive by the model. "
    f"That is the number of observations for which the model has incorreclty assigned the label '$\color{{orange}}{{\\text{{{PRIVILEGED_CLASS}}}}}$'.\n\n"
)
display(Markdown(display_str))

This represents the ratio of observations that have been incorrectly classified as positive by the model. That is the number of observations for which the model has incorreclty assigned the label '$\color{orange}{\text{>50K}}$'.



\begin{equation*}
FPR = \frac{FP}{TN+FP}
\end{equation*}

We are interested by the difference between the FPR for the priviled group and the FPR for the unprivileged group.

\begin{equation*}
FPR_{\text{unprivileged}} - FPR_{\text{privileged}}
\end{equation*}

In [27]:
fpr_diff = classification_metrics.false_positive_rate_difference()

display_str = (
    f"The FPR difference is $\color{{blue}}{{{fpr_diff:0.3f}}}$. "
    f"We want this metric to be as close to $\color{{green}}{{0}}$ as possible."
)
display(Markdown(display_str))

The FPR difference is $\color{blue}{-0.060}$. We want this metric to be as close to $\color{green}{0}$ as possible.

#### False Negative Rate (FNR)

In [28]:
display_str = (
    f"This represents the ratio of observations that have been incorrectly classified as negative by the model. "
    f"That is the number of observations for which the model should have assigned the label '$\color{{orange}}{{\\text{{{PRIVILEGED_CLASS}}}}}$' but hasn't.\n\n"
)
display(Markdown(display_str))

This represents the ratio of observations that have been incorrectly classified as negative by the model. That is the number of observations for which the model should have assigned the label '$\color{orange}{\text{>50K}}$' but hasn't.



\begin{equation*}
FNR = \frac{FN}{TP+FN}
\end{equation*}

We are interested by the difference between the FNR for the priviled group and the FNR for the unprivileged group.

\begin{equation*}
FNR_{\text{unprivileged}} - FNR_{\text{privileged}}
\end{equation*}

In [29]:
fnr_diff = classification_metrics.false_negative_rate_difference()

display_str = (
    f"The FNR difference is $\color{{blue}}{{{fnr_diff:0.3f}}}$. "
    f"We want this metric to be as close to $\color{{green}}{{0}}$ as possible."
)
display(Markdown(display_str))

The FNR difference is $\color{blue}{0.039}$. We want this metric to be as close to $\color{green}{0}$ as possible.

In [30]:
tpr_fig = model_bias.visualisation.plot.pie_proportion(
    (tpr_diff+1)/2, 
    target=0.5, 
    label=f"{tpr_diff:0.3f}",     
    title="TPR difference",
    show=False
)

tpr_fig.x_range = model_bias.visualisation.plot.Range1d(-0.25, 0.25)
tpr_fig.y_range = model_bias.visualisation.plot.Range1d(0., 2.)
tpr_fig.plot_width //= 3

##############
fpr_fig = model_bias.visualisation.plot.pie_proportion(
    (fpr_diff+1)/2, 
    target=0.5, 
    label=f"{fpr_diff:0.3f}",     
    title="FPR difference",
    show=False
)

fpr_fig.x_range = model_bias.visualisation.plot.Range1d(-0.25, 0.25)
fpr_fig.y_range = model_bias.visualisation.plot.Range1d(0., 2.)
fpr_fig.plot_width //= 3

##############
fnr_fig = model_bias.visualisation.plot.pie_proportion(
    (fnr_diff+1)/2, 
    target=0.5, 
    label=f"{fnr_diff:0.3f}",     
    title="FNR difference",
    show=False
)

fnr_fig.x_range = model_bias.visualisation.plot.Range1d(-0.25, 0.25)
fnr_fig.y_range = model_bias.visualisation.plot.Range1d(0., 2.)
fnr_fig.plot_width //= 3

##############
layout = model_bias.visualisation.plot.gridplot([[tpr_fig, fpr_fig, fnr_fig]], merge_tools=False)
model_bias.visualisation.plot.display(layout)