In [None]:
%pwd

In [None]:
%cd ..

In [None]:
# Load all necessary packages
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult

from aif360.algorithms.inprocessing.exponentiated_gradient_reduction import ExponentiatedGradientReduction

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MaxAbsScaler
from sklearn.metrics import accuracy_score

from IPython.display import Markdown, display

import numpy as np

In [None]:
# Load Wenjie's dataset
adult_data = np.load('/Users/ryandevera/data-science/umn_environments/Constrained-Deep-Learning-Survey/data/adult/adult.npz')
adult_data

In [None]:
list(adult_data.keys())

In [None]:
adult_data.files

In [None]:
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult

In [None]:
# Get the dataset and split into train and test
dataset_orig = load_preproc_data_adult()

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]

np.random.seed(0)
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)

In [None]:
type(dataset_orig)

In [None]:
# Metric for the original dataset
metric_orig_train = BinaryLabelDatasetMetric(
    dataset_orig_train, 
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)
display(Markdown("#### Original training dataset"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())
metric_orig_test = BinaryLabelDatasetMetric(
    dataset_orig_test, 
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_test.mean_difference())

In [None]:
min_max_scaler = MaxAbsScaler()
dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)
metric_scaled_train = BinaryLabelDatasetMetric(
    dataset_orig_train, 
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)
display(Markdown("#### Scaled dataset - Verify that the scaling does not affect the group label statistics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_train.mean_difference())
metric_scaled_test = BinaryLabelDatasetMetric(
    dataset_orig_test, 
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_test.mean_difference())

In [None]:
# Load our model
from cdl_python.core.models import MLP

# To use pytorch with sklearn lets use skorch
import torch
from skorch import NeuralNetBinaryClassifier


X_train = dataset_orig_train.features
y_train = dataset_orig_train.labels.ravel()

mlp_model = MLP(num_features=X_train.shape[1], num_classes=1)
mlp_model = mlp_model.to(dtype=torch.double)
model = NeuralNetBinaryClassifier(
    mlp_model,
    train_split=None,
    criterion=torch.nn.BCEWithLogitsLoss,
    optimizer=torch.optim.Adam,
    lr=0.0001,
    max_epochs=25,
    batch_size=16,
)

model.fit(X_train, y_train)

In [None]:
# model.history[0]['epoch']

In [None]:
X_test = dataset_orig_test.features
y_test = dataset_orig_test.labels.ravel()

y_pred = model.predict(X_test)

display(Markdown("#### Accuracy"))
lr_acc = accuracy_score(y_test, y_pred)
print(lr_acc)

# Exponentiated Gradient Reduction

In [None]:
# from aif360.sklearn.inprocessing import ExponentiatedGradientReduction

In [None]:
# Estimator
mlp_model = MLP(num_features=X_train.shape[1], num_classes=1)
mlp_model = mlp_model.to(dtype=torch.double)

class FairClassifier(NeuralNetBinaryClassifier):
    def get_loss(self, y_pred, y_true, X=None, training=False):
        y_true = torch.tensor(y_true, dtype=torch.double, device=self.device)
        return self.criterion_(y_pred, y_true)

# Positive weight
num_positives = y_train.sum()
num_negatives = (1 - y_train).sum()

# Calculate pos_weight
pos_weight = torch.tensor(num_negatives / num_positives)

estimator = FairClassifier(
    mlp_model,
    train_split=None,
    criterion=torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight),
    optimizer=torch.optim.Adam,
    lr=0.0001,
    max_epochs=1,
    batch_size=len(X_train),
)

# from sklearn.linear_model import LogisticRegression
# estimator = LogisticRegression(solver='lbfgs', max_iter=1000)

In [None]:
np.random.seed(0) #need for reproducibility
exp_grad_red = ExponentiatedGradientReduction(
    estimator=estimator, 
    constraints="EqualizedOdds",
    drop_prot_attr=False
)
exp_grad_red.fit(dataset_orig_train)
exp_grad_red_pred = exp_grad_red.predict(dataset_orig_test)

In [None]:
metric_test = ClassificationMetric(
    dataset_orig_test, 
    exp_grad_red_pred,
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_groups
)

display(Markdown("#### Accuracy"))
egr_acc = metric_test.accuracy()
print(egr_acc)

display(Markdown("#### Average odds difference"))
egr_aod = metric_test.average_odds_difference()
print(egr_aod)