In [1]:
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [2]:
from load_data import *

In [3]:
X_tr, X_te, y_tr, y_te, xs_tr, xs_te = load_adult_data(svm=False,random_state=42)

In [4]:
class NPsDataSet(TensorDataset):

    def __init__(self, *dataarrays):
        tensors = (torch.tensor(da).float() for da in dataarrays)
        super(NPsDataSet, self).__init__(*tensors)

In [5]:
y_tr = y_tr.astype('float32')
y_te = y_te.astype('float32')
train_data = NPsDataSet(X_tr, y_tr, xs_tr)
test_data = NPsDataSet(X_te, y_te, xs_te)

In [6]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, drop_last=True)

In [7]:
print('# training samples:', len(train_data))
print('# batches:', len(train_loader))

# training samples: 34189
# batches: 1068


In [8]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [9]:
class Classifier(nn.Module):

    def __init__(self, n_features, n_hidden=32, p_dropout=0.2):
        super(Classifier, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(n_features, n_hidden),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Dropout(p_dropout),
            nn.Linear(n_hidden, 1),
        )

    def forward(self, x):
        return F.sigmoid(self.network(x))

In [10]:
n_features = train_data.tensors[0].shape[1]
clf = Classifier(n_features=n_features)
clf_criterion = nn.BCELoss()
clf_optimizer = optim.Adam(clf.parameters())

In [47]:
N_CLF_EPOCHS = 2

for epoch in range(N_CLF_EPOCHS):
    for x, y, _ in train_loader:
        clf.zero_grad()
        p_y = clf(x)
        loss = clf_criterion(p_y, y)
        loss.backward()
        clf_optimizer.step()

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


In [11]:
class Adversary(nn.Module):

    def __init__(self, n_sensitive, n_hidden=32):
        super(Adversary, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(1, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_hidden),
            nn.ReLU(),
            nn.Linear(n_hidden, n_sensitive),
        )

    def forward(self, x):
        return F.sigmoid(self.network(x))

In [12]:
lambdas = torch.Tensor([200, 30])
adv = Adversary(xs_tr.shape[1])
adv_criterion = nn.BCELoss(reduce=False)
adv_optimizer = optim.Adam(adv.parameters())



In [50]:
N_ADV_EPOCHS = 5

In [13]:
adv

Adversary(
  (network): Sequential(
    (0): Linear(in_features=1, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [51]:
for epoch in range(N_ADV_EPOCHS):
    for x, _, z in train_loader:
        adv.zero_grad()
        p_y = clf(x).detach()
        p_z = adv(p_y)
        loss = (adv_criterion(p_z, z) * lambdas).mean()
        loss.backward()
        adv_optimizer.step()

In [52]:
N_EPOCH_COMBINED = 165

for epoch in range(1, N_EPOCH_COMBINED):

    # Train adversary
    for x, y, z in train_loader:
        adv.zero_grad()
        p_y = clf(x)
        p_z = adv(p_y)
        loss_adv = (adv_criterion(p_z, z) * lambdas).mean()
        loss_adv.backward()
        adv_optimizer.step()

    # Train classifier on single batch
    for x, y, z in train_loader:
        pass  # Ugly way to get a single batch
    clf.zero_grad()
    p_y = clf(x)
    p_z = adv(p_y)
    loss_adv = (adv_criterion(p_z, z) * lambdas).mean()
    clf_loss = clf_criterion(p_y, y) - (adv_criterion(adv(p_y), z) * lambdas).mean()
    clf_loss.backward()
    clf_optimizer.step()

# testing Fair NN model

In [55]:
from fair_eval import calculate_prule_clf, calculate_odds_clf, calculate_parity_reg, calculate_group_loss,l2_loss, calculate_overall_accuracy

ImportError: cannot import name 'bce_loss' from 'fair_eval' (/home/srp/Fairness/fairness_audit/codes/fair_eval.py)

In [56]:
pred = (clf(torch.tensor(X_te).float())>0.5).float()
pred = pred.cpu().detach().numpy().flatten()

In [57]:
pred_ = clf(torch.tensor(X_te).float())
pred_ = pred_.cpu().detach().numpy().flatten()

In [58]:
calculate_prule_clf(pred,y_te,xs_te)

disparate impact:  97.62592268977488
disparate misclassification rate:  83.1986660114709
disparate false positive rate: nan
disparate false negative rate: nan


  prule = min((fr0/s0)/(fr1/s1),(fr1/s1)/(fr0/s0))*100
  prule = min((fr0/s0)/(fr1/s1),(fr1/s1)/(fr0/s0))*100


In [59]:
calculate_odds_clf(pred,y_te,xs_te)

equalized opportunity for 0.0 : 67.96607568463152
equalized opportunity for 1.0 : 68.42355371900827


In [63]:
calculate_group_loss(bce_loss,pred_,y_te,xs_te)

loss function:  bce_loss
loss value for group 0: 0.34681829810142517
loss value for group 1: 0.5016478896141052


In [61]:
calculate_overall_accuracy(pred,y_te)

0.7950910297044299

In [64]:
import numpy as np