In [None]:
%pwd

In [None]:
%cd ../..

In [None]:
import sys

import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Markdown, display

# Load in datasets from ai360
from aif360.datasets import MEPSDataset19
from aif360.datasets import MEPSDataset20
from aif360.datasets import MEPSDataset21

# Load in the fairness metrics from ai360
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric

# Load in the explainers
from aif360.explainers import MetricTextExplainer

# Scalers
from sklearn.preprocessing import StandardScaler

# Load in bias mitigation techniques
from aif360.algorithms.preprocessing import Reweighing
from aif360.algorithms.inprocessing import PrejudiceRemover

# Load in models from sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

# Load data and create splits for training, validation and testing

In [None]:
(
    dataset_orig_panel19_train,
    dataset_orig_panel19_val,
    dataset_orig_panel19_test
) = MEPSDataset19().split([0.5, 0.8], shuffle=True)

sens_ind = 0
sens_attr = dataset_orig_panel19_train.protected_attribute_names[sens_ind]
print(sens_attr)

# Create unprivileged groups
unprivileged_groups = [
    {sens_attr: v} for v
    in dataset_orig_panel19_train.unprivileged_protected_attributes[sens_ind]
]
print(unprivileged_groups)

privileged_groups = [
    {sens_attr: v} for v
    in dataset_orig_panel19_train.privileged_protected_attributes[sens_ind]
]
print(privileged_groups)

In [None]:
# What is BinaryLabelDatasetMetric?
metric_orig_panel19_train = BinaryLabelDatasetMetric(
    dataset_orig_panel19_train,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

# Setup an explainer
# What is disparate impact?
explainer_orig_panel19_train = MetricTextExplainer(metric_orig_panel19_train)
print(explainer_orig_panel19_train.disparate_impact())

# Learning a Logistic Regression Classifier on Original Data

In [None]:
# Load our model
from cdl_python.core.models import MLP

# To use pytorch with sklearn lets use skorch
import torch
from skorch import NeuralNetBinaryClassifier

dataset = dataset_orig_panel19_train

mlp_model = MLP(num_features=dataset.features.shape[1], num_classes=1)
mlp_model = mlp_model.to(dtype=torch.double)
torch_model = NeuralNetBinaryClassifier(
    mlp_model,
    criterion=torch.nn.BCEWithLogitsLoss,
    optimizer=torch.optim.Adam,
    lr=0.0001,
    max_epochs=10,
    batch_size=16,
)

# MLP from our cdl survey
model = make_pipeline(
    StandardScaler(),
    torch_model
)

mlp_orig_panel19 = model.fit(dataset.features, dataset.labels.ravel())

In [None]:
from collections import defaultdict

def test(dataset, model, thresh_arr):
    """
    Function to produce predictions and measures
    for constrained deep learning survey
    """
    try:
        # sklearn classifier
        y_val_pred_prob = model.predict_proba(dataset.features)
        pos_ind = np.where(np.array(model.classes_) == dataset.favorable_label)[0][0]

    except AttributeError:
        # aif360 inprocessing algorithm
        y_val_pred_prob = model.predict(dataset).scores
        pos_ind = 0

    # Save the metrics
    metric_arrs = defaultdict(list)
    for thresh in thresh_arr:
        # Get predictions and threshold to binaryu
        y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)

        # Build dataset with predictions
        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred

        # Compute the metrics
        metric = ClassificationMetric(
            dataset,
            dataset_pred,
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups,
        )

        # Balanced Accuracy
        metric_arrs['bal_acc'].append(
            (metric.true_positive_rate() + metric.true_negative_rate()) / 2
        )
        # Average odds diff
        metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())

        # Disparity impact
        metric_arrs['disp_imp'].append(metric.disparate_impact())

        # Stat par diff - will need to look into the paper
        metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())

        # Equal opp diff - will need to look into the paper
        metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())

        # Not sure what this metric is will also need to look into the paper
        metric_arrs['theil_ind'].append(metric.theil_index())
    
    return metric_arrs

In [None]:
thresh_arr = np.linspace(0.01, 0.5, 50)
val_metrics = test(
    dataset=dataset_orig_panel19_val,
    model=lr_orig_panel19,
    thresh_arr=thresh_arr
)
mlp_orig_best_ind = np.argmax(val_metrics['bal_acc'])

In [None]:
def plot(x, x_name, y_left, y_left_name, y_right, y_right_name):
    fig, ax1 = plt.subplots(figsize=(7, 4))
    ax1.plot(x, y_left)
    ax1.set_xlabel(x_name, fontsize=16, fontweight='bold')
    ax1.set_ylabel(y_left_name, color='b', fontsize=16, fontweight='bold')
    ax1.xaxis.set_tick_params(labelsize=14)
    ax1.yaxis.set_tick_params(labelsize=14)
    ax1.set_ylim(0.5, 0.8)

    ax2 = ax1.twinx()
    ax2.plot(x, y_right, color='r')
    ax2.set_ylabel(y_right_name, color='r', fontsize=16, fontweight='bold')
    if 'DI' in y_right_name:
        ax2.set_ylim(0., 0.7)
    else:
        ax2.set_ylim(-0.25, 0.1)

    best_ind = np.argmax(y_left)
    ax2.axvline(np.array(x)[best_ind], color='k', linestyle=':')
    ax2.yaxis.set_tick_params(labelsize=14)
    ax2.grid(True)

In [None]:
disp_imp = np.array(val_metrics['disp_imp'])
disp_imp_err = 1 - np.minimum(disp_imp, 1 / disp_imp)
plot(
    thresh_arr,
    'Classification Thresholds',
    val_metrics['bal_acc'],
    'Balanced Accuracy',
    disp_imp_err,
    '1 - min(DI, 1 / DI)'
)

In [None]:
plot(
    thresh_arr,
    'Classification Thresholds',
    val_metrics['bal_acc'],
    'Balanced Accuracy',
    val_metrics['avg_odds_diff'],
    'avg. odds diff.'
)

In [None]:
def describe_metrics(metrics, thresh_arr):
    """
    Function to describe the metrics in the AIF360 framework
    """
    # Get the index of the best balanced accuracy
    best_ind = np.argmax(metrics['bal_acc'])

    # Print the metric outputs
    print(
        "Threshold corresponding to Best balanced accuracy: {:6.4f}".format(thresh_arr[best_ind])
    )
    print(
        "Best balanced accuracy: {:6.4f}".format(metrics['bal_acc'][best_ind])
    )

    # Compute the disparity impact
    disp_imp_at_best_ind = (
        1 - min(metrics['disp_imp'][best_ind], 1 / metrics['disp_imp'][best_ind])
    )
    print(
        "Corresponding 1-min(DI, 1/DI) value: {:6.4f}".format(disp_imp_at_best_ind)
    )
    print(
        "Corresponding average odds difference value: {:6.4f}".format(metrics['avg_odds_diff'][best_ind])
    )
    print(
        "Corresponding statistical parity difference value: {:6.4f}".format(metrics['stat_par_diff'][best_ind])
    )
    print(
        "Corresponding equal opportunity difference value: {:6.4f}".format(metrics['eq_opp_diff'][best_ind])
    )
    print(
        "Corresponding Theil index value: {:6.4f}".format(metrics['theil_ind'][best_ind])
    )

In [None]:
describe_metrics(val_metrics, thresh_arr)

### Testing MLP model on original data

In [None]:
mlp_orig_metrics = test(
    dataset=dataset_orig_panel19_test,
    model=mlp_orig_panel19,
    thresh_arr=[thresh_arr[lr_orig_best_ind]]
)
describe_metrics(mlp_orig_metrics, [thresh_arr[mlp_orig_best_ind]])

# Bias mitigation using in-procesesing technique - Prejudice Remover (PR)

In [None]:
# Initialize the prejudice remover and standar scaler
# & transform the data
model = PrejudiceRemover(sensitive_attr=sens_attr, eta=25.0)
pr_orig_scaler = StandardScaler()

dataset = dataset_orig_panel19_train.copy()
dataset.features = pr_orig_scaler.fit_transform(dataset.features)

pr_orig_panel19 = model.fit(dataset)