In [1]:
import os
import torch
import random
import sklearn
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

In [2]:
'''
Initialize the parameters
'''
DATASET = 'income' # {"income", "crim-recid", "health"}
MODEL = 'main'
DATA_LOC = './../data/'

In [3]:
class IncomeDataset(Dataset):
    """Income dataset."""

    def __init__(self, file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir
        self.all_data = np.load(self.root_dir+file)
        self.x = self.all_data['x']
        self.y = self.all_data['y']
        self.a = self.all_data['a']
        self.transform = transform
        
        # Complete all the dataset specific processing here
        print('Income dataset (x) dims: {}'.format(self.x.shape))
        print('Income dataset (y) dims: {}'.format(self.y.shape))
        print('Income dataset (a) dims: {}'.format(self.a.shape))

    def __len__(self):
        return len(self.all_data)

    def __getitem__(self, idx):
        #if torch.is_tensor(idx):
        #    idx = idx.tolist()
        
        sample_x, sample_y, sample_a = np.array(self.x[idx]), np.array(self.y[idx]), np.array(self.a[idx])
        sample_x, sample_y, sample_a = torch.tensor(sample_x, dtype=torch.float32), torch.tensor(sample_y, dtype=torch.long), torch.tensor(sample_a, dtype=torch.float32)

        #print('sample_x.shape: {}'.format(sample_x.shape))
        #print('sample_y.shape: {}'.format(sample_y.shape))
        #print('sample_a.shape: {}'.format(sample_a.shape))
        
        return sample_x, sample_y, sample_a

In [4]:
'''
Define the model
'''
class NNet(torch.nn.Module):
    def __init__(self):
        """
        Explicit layer definition
        """
        super(NNet, self).__init__()
        self.fc1 = nn.Linear(113, 50)
        self.fc2 = nn.Linear(50, 25)
        self.fc3 = nn.Linear(25, 2)

    def forward(self, x):
        """
        Explicit model definition
        """
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

# Construct our model by instantiating the class defined above
model = NNet()

# Construct our loss function and an Optimizer. Training this strange model with
# vanilla stochastic gradient descent is tough, so we use momentum
criterion = torch.nn.CrossEntropyLoss(reduction='sum')
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)

In [5]:
'''
Load the datasets
'''
if (DATASET=='income'):
    dataset = IncomeDataset(file='adult_train.npz',
                                    root_dir=DATA_LOC)
    dataloader = DataLoader(dataset, batch_size=10,
                        shuffle=True, num_workers=0)    
elif (DATASET=='health'):
    dataset = HealthDataset(file='processed.switzerland.data',
                                    root_dir=DATA_LOC)
    dataloader = DataLoader(dataset, batch_size=5,
                        shuffle=True, num_workers=10)

Income dataset (x) dims: (32561, 113)
Income dataset (y) dims: (32561, 1)
Income dataset (a) dims: (32561, 1)


In [6]:
model

NNet(
  (fc1): Linear(in_features=113, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=25, bias=True)
  (fc3): Linear(in_features=25, out_features=2, bias=True)
)

In [10]:
'''
Train the model
'''

epochs = 5000
total = 0
correct = 0
    
for t in range(epochs):
    for i_batch, sample_batched in enumerate(dataloader):
        x, y, a = sample_batched[0], sample_batched[1].squeeze(), sample_batched[2]
        
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x)
        
        #print('y_pred.dims: {}'.format(y_pred.shape))
        #print('y.dims: {}'.format(y.shape))

        # Compute and print loss
        loss = criterion(y_pred, y)
            
        _, predicted = torch.max(y_pred.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()

        if t % 100 == 50:
            print(t, loss.item())
            print('Accuracy of the network: %d %%' % (
                100 * correct / total))

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

50 1.211029052734375
Accuracy of the network: 66 %
150 1.1469459533691406
Accuracy of the network: 77 %
250 1.0605443716049194
Accuracy of the network: 86 %
350 0.9465766549110413
Accuracy of the network: 90 %
450 0.8029006123542786
Accuracy of the network: 92 %
550 0.6409387588500977
Accuracy of the network: 93 %
650 0.4836125075817108
Accuracy of the network: 94 %
750 0.34897053241729736
Accuracy of the network: 95 %
850 0.2521102726459503
Accuracy of the network: 95 %
950 0.18402588367462158
Accuracy of the network: 96 %
1050 0.13868367671966553
Accuracy of the network: 96 %
1150 0.10748478025197983
Accuracy of the network: 96 %
1250 0.08567927032709122
Accuracy of the network: 97 %
1350 0.06997472047805786
Accuracy of the network: 97 %
1450 0.058334045112133026
Accuracy of the network: 97 %
1550 0.04954163730144501
Accuracy of the network: 97 %
1650 0.042689867317676544
Accuracy of the network: 97 %
1750 0.037263281643390656
Accuracy of the network: 98 %
1850 0.03291705623269081
Ac

In [8]:
'''
Train the model (Regularize with different fairness metrics)
'''


'\nTrain the model (Regularize with different fairness metrics)\n'