# Fairness and Privacy
This demo demonstrates how to mitigate unfairness through resampling. 

### Step 1: Import Libraries

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, WeightedRandomSampler
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
# seed everything
torch.manual_seed(0)
np.random.seed(0)
if torch.cuda.is_available():
    torch.cuda.manual_seed(0)
    torch.cuda.manual_seed_all(0)

In [None]:
# # If you use Colab
# # TODO
# from google.colab import drive
# drive.mount('/content/gdrive')

# # Path to the directory containing the data
# import os
# path = './gdrive/MyDrive/Code/ic/TAIMI_trustworthy' 
# os.chdir(path)

### Step 2: Prepare the Skin Lesion Dataset
Here we use a small split of the ISIC 2019 dataset

In [3]:
# Visualize the data distribution
df = pd.read_csv('Data/ISIC2019/label.csv')
df

Unnamed: 0,image,label,label_code,sex,sex_code
0,ISIC_0034321,NV,0,female,0
1,ISIC_0034322,NV,0,male,1
2,ISIC_0034324,NV,0,male,1
3,ISIC_0034325,NV,0,female,0
4,ISIC_0034328,NV,0,male,1
...,...,...,...,...,...
495,ISIC_0054574,MEL,1,female,0
496,ISIC_0054577,MEL,1,male,1
497,ISIC_0054612,MEL,1,male,1
498,ISIC_0054637,MEL,1,female,0


In [82]:
# Label distribution
df['label'].value_counts()

NV     250
MEL    250
Name: label, dtype: int64

In [83]:
# Sensitive attribute (sex) distribution
df['sex'].value_counts()

male      268
female    232
Name: sex, dtype: int64

In [84]:
root_dir = '/home/siyi/project/course/TAIMI_trustworthy/Data/ISIC2019/images'

class ISICDataset(Dataset):
    def __init__(self, df, root_dir, transform=None, do_train=False):
        self.df = df
        self.root_dir = root_dir
        self.do_train = do_train
        if self.do_train:
            self.transform = transform
        else:
            self.transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        print(f'Number of samples: {len(self.df)}')

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row['image']
        img_path = self.root_dir + '/' + img_name + '.jpg'
        image = Image.open(img_path)
        label = row['label_code']
        sensitive_attr = row['sex_code']

        if self.transform:
            image = self.transform(image)

        return image, label, sensitive_attr, img_name

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(),
    transforms.RandomHorizontalFlip(),
    transforms.CenterCrop(size=224), 
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create train and test datasets
train_df, test_df = train_test_split(df, test_size=0.2, random_state=2025)
train_dataset = ISICDataset(train_df, root_dir, transform=train_transform, do_train=True)
test_dataset = ISICDataset(test_df, root_dir)

# Create train and test dataloaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

Number of samples: 400
Number of samples: 100


### Step 3: Define the Model

In [85]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(512, len(df['label'].unique()))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

### Step 4: Train the Model
Define the loss function and optimizer, and train the model on the dataset.

In [86]:
# Training
def training(model, criterion, optimizer, train_loader, num_epochs=20):
    for epoch in range(num_epochs):
        model.train()
        for images, labels, _, _ in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    return model


def testing(model, test_loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        prediction_list = []
        label_list = []
        s_list = []
        prob_list = []  # Save True class probability for AUC calculation
        for images, labels, s, _ in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            probs = F.softmax(outputs.detach(), dim=1)
            probs = probs[:, 1]  # Probability of True class

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Save predictions, labels, and sensitive attributes
            prediction_list.extend(predicted.cpu().numpy())
            label_list.extend(labels.cpu().numpy())
            s_list.extend(s.cpu().numpy())
            prob_list.extend(probs.cpu().numpy())
        
        print(f'Test Accuracy: {100 * correct / total:.2f}%')
    return prediction_list, prob_list, label_list, s_list

In [87]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
# Train the model
model = training(model, criterion, optimizer, train_loader, num_epochs=10)

Epoch [1/10], Loss: 0.2950
Epoch [2/10], Loss: 0.4005
Epoch [3/10], Loss: 0.0306
Epoch [4/10], Loss: 0.5105
Epoch [5/10], Loss: 0.1221
Epoch [6/10], Loss: 0.0373
Epoch [7/10], Loss: 0.2420
Epoch [8/10], Loss: 0.0261
Epoch [9/10], Loss: 0.1458
Epoch [10/10], Loss: 0.1415


In [88]:
# Test and save the model
predictions, probs, labels, sensitive_attrs = testing(model, test_loader)
torch.save(model.state_dict(), 'Checkpoint/ISIC2019_model.pth')

Test Accuracy: 89.00%


### Step 4: Calculate Evaluation Metrics
#### Classification Performance
* Accuracy
* Area under the curve (AUC)


#### Fairness Metrics
* Demographic Parity: The predicted positive rate should be equal across sensitive attributes ($s \in S$).
$$
DP = \operatorname{abs}[p(\hat{y}=1|s=0)-p(\hat{y}=1|s=1)]
$$

* Accuracy Parity: The accuracy should be equal across sensitive attributes.
$$
AP = \operatorname{abs}[p(\hat{y}=y|s=0)-p(\hat{y}=y|s=1)]
$$

* Equalized Odds: The true positive rates (TPRs) and false positive rates (FPRs) should be equalized across sensitive attributes.
$$
EOD = \operatorname{abs}[p(\hat{y}=1|\hat{y}=y, s=0)-p(\hat{y}=1|\hat{y}=y, s=1)]
$$

* Equal Opportunity: The the true positive rates should be equalized across sensitive attributes.
$$
EO = \operatorname{abs}[p(\hat{y}=1|\hat{y}=1, s=j)-p(\hat{y}=1|\hat{y}=1, s=j)]
$$



In [89]:
# Classification performance
auc = roc_auc_score(labels, probs)
accuracy = np.mean(np.array(predictions) == np.array(labels))
print(f'Overall Accuracy: {accuracy:.2f}')
print(f'AUC: {auc:.4f}')

Overall Accuracy: 0.89
AUC: 0.9684


In [90]:
# Fairness performance
g0_idx = np.where(np.array(sensitive_attrs) == 0)[0]
g1_idx = np.where(np.array(sensitive_attrs) == 1)[0]

g0_pred = np.array(predictions)[g0_idx]
g1_pred = np.array(predictions)[g1_idx]

g0_labels = np.array(labels)[g0_idx]
g1_labels = np.array(labels)[g1_idx]

# Demographic Parity
def demographic_parity(g0_pred, g1_pred):
    dp = np.abs(np.mean(g0_pred==1) - np.mean(g1_pred==1))
    return dp

# Accuracy Parity
def accuracy_parity(g0_pred, g1_pred, g0_labels, g1_labels):
    g0_acc = np.mean(g0_pred==g0_labels)
    g1_acc = np.mean(g1_pred==g1_labels)
    ap = np.abs(g0_acc - g1_acc)
    return ap

# Equalized Odds
def equalized_odds(g0_pred, g1_pred, g0_labels, g1_labels):
    if np.sum(g0_pred==g0_labels) == 0:
        g0_rate = 0
    else:
        g0_rate = np.sum((g0_pred==1) & (g0_labels==1)) / np.sum(g0_pred==g0_labels)
    if np.sum(g1_pred==g1_labels) == 0:
        g1_rate = 0
    else:
        g1_rate = np.sum((g1_pred==1) & (g1_labels==1)) / np.sum(g1_pred==g1_labels)
    eod = np.abs(g0_rate - g1_rate)
    return eod

# Equal Opportunity
def equal_opportunity(g0_pred, g1_pred, g0_labels, g1_labels):
    g0_tpr = np.sum((g0_pred==1) & (g0_labels==1)) / np.sum(g0_labels==1)
    g1_tpr = np.sum((g1_pred==1) & (g1_labels==1)) / np.sum(g1_labels==1)
    eo = np.abs(g0_tpr - g1_tpr)
    return eo


dp = demographic_parity(g0_pred, g1_pred)
ap = accuracy_parity(g0_pred, g1_pred, g0_labels, g1_labels)
eod = equalized_odds(g0_pred, g1_pred, g0_labels, g1_labels)
eo = equal_opportunity(g0_pred, g1_pred, g0_labels, g1_labels)

print(f'Demographic Parity: {dp:.4f}')
print(f'Accuracy Parity: {ap:.4f}')
print(f'Equalized Odds: {eod:.4f}')
print(f'Equal Opportunity: {eo:.4f}')

Demographic Parity: 0.1597
Accuracy Parity: 0.1044
Equalized Odds: 0.1235
Equal Opportunity: 0.2174


### Step 5: Mitigate Unfairness Using Demographic Parity Loss
Add a demographic parity loss regularization to the original loss.

In [71]:
def demographic_parity_loss(y_pred, sensitive_attr):
    """
    Calculate the demographic parity loss as the difference between
    the mean predicted positive rates for different sensitive groups.
    """
    # Group 0 and Group 1 masks
    group_0_mask = (sensitive_attr == 0)
    group_1_mask = (sensitive_attr == 1)
    
    group_0_rate = y_pred[group_0_mask].float().mean()
    group_1_rate = y_pred[group_1_mask].float().mean()
    
    dp_loss = torch.abs(group_0_rate - group_1_rate)
    return dp_loss

# Training with regularization
def training_with_dp_regularization(model, criterion, optimizer, train_loader, num_epochs=10, dp_weight=0.1):
    for epoch in range(num_epochs):
        model.train()
        for images, labels, sensitive_attr, _ in train_loader:
            images, labels, sensitive_attr = images.to(device), labels.to(device), sensitive_attr.to(device)
            
            # Forward pass
            outputs = model(images)
            loss_ce = criterion(outputs, labels)

            # Demographic Parity loss   
            with torch.no_grad():
                _, y_pred = torch.max(outputs.data, 1)
                dp_loss = demographic_parity_loss(y_pred, sensitive_attr)
            
            loss = loss_ce + dp_loss * dp_weight

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
    return model

In [72]:
# Define the model
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(512, len(df['label'].unique()))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Use the resampled dataloader to train the model
model = training_with_dp_regularization(model, criterion, optimizer, train_loader, num_epochs=10)

Epoch [1/10], Loss: 0.4441
Epoch [2/10], Loss: 0.1664
Epoch [3/10], Loss: 0.2355
Epoch [4/10], Loss: 0.0899
Epoch [5/10], Loss: 0.0579
Epoch [6/10], Loss: 0.4091
Epoch [7/10], Loss: 0.3374
Epoch [8/10], Loss: 0.0467
Epoch [9/10], Loss: 0.2305
Epoch [10/10], Loss: 0.1346


In [74]:
# Evaluate the model
predictions, probs, labels, sensitive_attrs = testing(model, test_loader)

# Classification performance
auc = roc_auc_score(labels, probs)
print(f'AUC: {auc:.4f}')

# Fairness performance
g0_idx = np.where(np.array(sensitive_attrs) == 0)[0]
g1_idx = np.where(np.array(sensitive_attrs) == 1)[0]
g0_pred = np.array(predictions)[g0_idx]
g1_pred = np.array(predictions)[g1_idx]
g0_labels = np.array(labels)[g0_idx]
g1_labels = np.array(labels)[g1_idx]

dp = demographic_parity(g0_pred, g1_pred)
ap = accuracy_parity(g0_pred, g1_pred, g0_labels, g1_labels)
eod = equalized_odds(g0_pred, g1_pred, g0_labels, g1_labels)
eo = equal_opportunity(g0_pred, g1_pred, g0_labels, g1_labels)

print(f'Demographic Parity: {dp:.4f}')
print(f'Accuracy Parity: {ap:.4f}')
print(f'Equalized Odds: {eod:.4f}')
print(f'Equal Opportunity: {eo:.4f}')

Test Accuracy: 85.00%
AUC: 0.9620
Demographic Parity: 0.0340
Accuracy Parity: 0.0260
Equalized Odds: 0.0527
Equal Opportunity: 0.0000
