<a href="https://colab.research.google.com/github/maverick-0215/Fairness-via-Continual-Learning/blob/main/Testing_Fairness_with_EWC_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

import torch.nn.functional as F
from torchvision import datasets, transforms

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import torch


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
           'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
           'hours-per-week', 'native-country', 'income']
data = pd.read_csv(url, header=None, names=columns, na_values=" ?")

data = pd.get_dummies(data, columns=data.select_dtypes(include=['object']).columns)

y = data['income_ >50K'].values

# Drop 'income' related columns from X
X = data.drop(['income_ <=50K', 'income_ >50K'], axis=1)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

gender_attr = data['sex_ Male'].values  # 1 for Male, 0 for Female
race_attr = data['race_ White'].values  # 1 for White, 0 for Black

# Filter data for White and Black individuals only
valid_race = (data['race_ White'] == 1) | (data['race_ Black'] == 1)
X_tensor = X_tensor[valid_race]
y_tensor = y_tensor[valid_race]
gender_attr = gender_attr[valid_race]
race_attr = race_attr[valid_race]

X_train, X_test, y_train, y_test, gender_train, gender_test, race_train, race_test = train_test_split(
    X_tensor, y_tensor, gender_attr, race_attr, test_size=0.2, random_state=42
)


In [None]:
from torch.utils.data import Dataset, DataLoader

class AdultDataset(Dataset):
    def __init__(self, X, y, gender_attrs, race_attrs):
        self.X = X
        self.y = y
        self.gender_attrs = gender_attrs
        self.race_attrs = race_attrs

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index], self.gender_attrs[index], self.race_attrs[index]


# Create DataLoaders
train_dataset = AdultDataset(X_train, y_train, gender_train, race_train)
test_dataset = AdultDataset(X_test, y_test, gender_test, race_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [None]:

class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # Output is 2 because it's a binary classification problem
        )

    def forward(self, x):
        return self.model(x)
def train(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for inputs, labels, _,_ in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    accuracy = 100 * correct / total
    return total_loss / len(loader), accuracy


In [None]:
def calculate_fairness_metrics(true_labels, preds, race_attrs, gender_attrs):
    metrics = {}

    # Convert to numpy arrays
    true_labels = np.array(true_labels)
    preds = np.array(preds)
    race_attrs = np.array(race_attrs)  # Assuming 1 = White, 0 = Black
    gender_attrs = np.array(gender_attrs)  # Assuming 1 = Male, 0 = Female

    groups = {
        "White Male": (race_attrs == 1) & (gender_attrs == 1),
        "White Female": (race_attrs == 1) & (gender_attrs == 0),
        "Black Male": (race_attrs == 0) & (gender_attrs == 1),
        "Black Female": (race_attrs == 0) & (gender_attrs == 0),
    }

    pos_rates = {}
    tpr_rates = {}
    fpr_rates = {}

    for group_name, group_idx in groups.items():
        if np.sum(group_idx) == 0:
            continue  # Skip groups with no samples

        pos_rate = np.mean(preds[group_idx])
        cm = confusion_matrix(true_labels[group_idx], preds[group_idx], labels=[0, 1])

        tpr = cm[1, 1] / (cm[1, 1] + cm[1, 0]) if (cm[1, 1] + cm[1, 0]) > 0 else 0
        fpr = cm[0, 1] / (cm[0, 1] + cm[0, 0]) if (cm[0, 1] + cm[0, 0]) > 0 else 0

        pos_rates[group_name] = pos_rate
        tpr_rates[group_name] = tpr
        fpr_rates[group_name] = fpr

    # Compute fairness metrics
    metrics["Demographic Parity"] = max(pos_rates.values()) - min(pos_rates.values())
    metrics["Equalized Odds (TPR Difference)"] = max(tpr_rates.values()) - min(tpr_rates.values())
    metrics["Equalized Odds (FPR Difference)"] = max(fpr_rates.values()) - min(fpr_rates.values())

    return metrics


In [None]:
from sklearn.metrics import confusion_matrix

from sklearn.metrics import confusion_matrix

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    all_preds = []
    all_labels = []
    all_sensitive_attrs_gender = []
    all_sensitive_attrs_race = []

    with torch.no_grad():
        for batch in loader:
            if len(batch) == 4:
                inputs, labels, gender, race = batch
                all_sensitive_attrs_gender.extend(gender.cpu().numpy())
                all_sensitive_attrs_race.extend(race.cpu().numpy())
            elif len(batch) == 3:
                inputs, labels, sensitive = batch
                all_sensitive_attrs_gender.extend(sensitive.cpu().numpy())  # Assuming gender in this case
            elif len(batch) == 2:
                inputs, labels = batch
            else:
                raise ValueError(f"Unexpected batch format with {len(batch)} elements.")

            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total

    # Compute fairness metrics and per-group accuracy
    if all_sensitive_attrs_gender and all_sensitive_attrs_race:
        fairness_metrics = calculate_fairness_metrics(
            all_labels, all_preds, all_sensitive_attrs_race, all_sensitive_attrs_gender
        )

        # Compute accuracy for each subgroup
        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)
        all_sensitive_attrs_race = np.array(all_sensitive_attrs_race)
        all_sensitive_attrs_gender = np.array(all_sensitive_attrs_gender)

        groups = {
            "White Male": (all_sensitive_attrs_race == 1) & (all_sensitive_attrs_gender == 1),
            "White Female": (all_sensitive_attrs_race == 1) & (all_sensitive_attrs_gender == 0),
            "Black Male": (all_sensitive_attrs_race == 0) & (all_sensitive_attrs_gender == 1),
            "Black Female": (all_sensitive_attrs_race == 0) & (all_sensitive_attrs_gender == 0),
        }

        group_accuracies = {}
        for group_name, group_idx in groups.items():
            if np.sum(group_idx) > 0:
                group_acc = (all_preds[group_idx] == all_labels[group_idx]).sum() / np.sum(group_idx)
                group_accuracies[group_name] = group_acc * 100
            else:
                group_accuracies[group_name] = None  # Handle empty groups

    else:
        fairness_metrics = {}
        group_accuracies = {}

    return total_loss / len(loader), accuracy, fairness_metrics, group_accuracies

# Training and Evaluation Loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size=X_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
epochs = 10

avg_dp = 0
avg_eo_diff_tpr = 0
avg_eo_diff_fpr = 0

for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc, fairness_metrics, group_accuracies = evaluate(model, test_loader, criterion, device)

    print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}% "
          f"- Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%\n")

    # print("Subgroup Accuracies:")
    # for group, acc in group_accuracies.items():
    #     print(f"{group}: {acc:.2f}%" if acc is not None else f"{group}: No samples in batch")

    avg_dp += fairness_metrics.get('Demographic Parity', 0)
    avg_eo_diff_tpr += fairness_metrics.get('Equalized Odds (TPR Difference)', 0)
    avg_eo_diff_fpr += fairness_metrics.get('Equalized Odds (FPR Difference)', 0)

# Print Final Fairness Metrics
print("\nFinal Fairness Metrics:")
print(f"Average Demographic Parity = {avg_dp / epochs:.4f}")
print(f"Average Equalized Odds (TPR Difference) = {avg_eo_diff_tpr / epochs:.4f}")
print(f"Average Equalized Odds (FPR Difference) = {avg_eo_diff_fpr / epochs:.4f}")



Epoch [1/10] - Train Loss: 0.3445, Train Accuracy: 83.96% - Test Loss: 0.3295, Test Accuracy: 84.92%

Epoch [2/10] - Train Loss: 0.3202, Train Accuracy: 85.08% - Test Loss: 0.3226, Test Accuracy: 85.33%

Epoch [3/10] - Train Loss: 0.3131, Train Accuracy: 85.55% - Test Loss: 0.3210, Test Accuracy: 85.52%

Epoch [4/10] - Train Loss: 0.3082, Train Accuracy: 85.64% - Test Loss: 0.3271, Test Accuracy: 85.20%

Epoch [5/10] - Train Loss: 0.3067, Train Accuracy: 85.77% - Test Loss: 0.3239, Test Accuracy: 85.10%

Epoch [6/10] - Train Loss: 0.3045, Train Accuracy: 85.91% - Test Loss: 0.3179, Test Accuracy: 85.31%

Epoch [7/10] - Train Loss: 0.3024, Train Accuracy: 85.77% - Test Loss: 0.3193, Test Accuracy: 85.10%

Epoch [8/10] - Train Loss: 0.3003, Train Accuracy: 86.11% - Test Loss: 0.3224, Test Accuracy: 85.47%

Epoch [9/10] - Train Loss: 0.2981, Train Accuracy: 86.17% - Test Loss: 0.3255, Test Accuracy: 85.13%

Epoch [10/10] - Train Loss: 0.2943, Train Accuracy: 86.14% - Test Loss: 0.3233, Te

The above results are just for normal nn trained on the whole data(males + females)



## Training nn with Task A(only male data) and Task B(only female data) with EWC loss function.

In [None]:
# Separate data into four groups: White Male, White Female, Black Male, Black Female
white_male_data = data[(data['race_ White'] == 1) & (data['sex_ Male'] == 1)]
white_female_data = data[(data['race_ White'] == 1) & (data['sex_ Male'] == 0)]
black_male_data = data[(data['race_ Black'] == 1) & (data['sex_ Male'] == 1)]
black_female_data = data[(data['race_ Black'] == 1) & (data['sex_ Male'] == 0)]

# Function to process each group
def process_group(group_data):
    X = group_data.drop(['income_ <=50K', 'income_ >50K'], axis=1).values
    y = group_data['income_ >50K'].values  # 1 if '>50K', 0 if '<=50K'

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    return train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

# Process each group
X_white_male_train, X_white_male_test, y_white_male_train, y_white_male_test = process_group(white_male_data)
X_white_female_train, X_white_female_test, y_white_female_train, y_white_female_test = process_group(white_female_data)
X_black_male_train, X_black_male_test, y_black_male_train, y_black_male_test = process_group(black_male_data)
X_black_female_train, X_black_female_test, y_black_female_train, y_black_female_test = process_group(black_female_data)


In [None]:
print(len(X_white_male_train))
print(len(X_white_female_train))
print(len(X_black_male_train))
print(len(X_black_female_train))
print(len(X_white_male_test))
print(len(X_white_female_test))
print(len(X_black_male_test))
print(len(X_black_female_test))

15339
6913
1255
1244
3835
1729
314
311


In [None]:
# Creating DataLoaders for the four groups
train_dataset_A = AdultDataset(
    X_white_male_train, y_white_male_train,
    gender_attrs=np.ones(len(y_white_male_train)),  # Male = 1
    race_attrs=np.ones(len(y_white_male_train))  # White = 1
)

train_dataset_B = AdultDataset(
    X_white_female_train, y_white_female_train,
    gender_attrs=np.zeros(len(y_white_female_train)),  # Female = 0
    race_attrs=np.ones(len(y_white_female_train))  # White = 1
)

train_dataset_C = AdultDataset(
    X_black_male_train, y_black_male_train,
    gender_attrs=np.ones(len(y_black_male_train)),  # Male = 1
    race_attrs=np.zeros(len(y_black_male_train))  # Black = 0
)

train_dataset_D = AdultDataset(
    X_black_female_train, y_black_female_train,
    gender_attrs=np.zeros(len(y_black_female_train)),  # Female = 0
    race_attrs=np.zeros(len(y_black_female_train))  # Black = 0
)

# Creating DataLoaders
train_loader_A = DataLoader(train_dataset_A, batch_size=64, shuffle=True)
train_loader_B = DataLoader(train_dataset_B, batch_size=64, shuffle=True)
train_loader_C = DataLoader(train_dataset_C, batch_size=64, shuffle=True)
train_loader_D = DataLoader(train_dataset_D, batch_size=64, shuffle=True)


In [None]:
# Define model, loss, and optimizer for Task A (White Male)
model = NeuralNet(input_size=X_white_male_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
epochs = 10

# Train on Task A (White Male)
for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader_A, criterion, optimizer, device)
    print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}% (Task A: White Male)")

# Save model parameters after Task A
old_params_A = {name: param.clone().detach() for name, param in model.named_parameters() if param.requires_grad}


Epoch [1/10] - Train Loss: 0.4275, Train Accuracy: 79.11% (Task A: White Male)
Epoch [2/10] - Train Loss: 0.3974, Train Accuracy: 81.04% (Task A: White Male)
Epoch [3/10] - Train Loss: 0.3917, Train Accuracy: 81.35% (Task A: White Male)
Epoch [4/10] - Train Loss: 0.3872, Train Accuracy: 81.31% (Task A: White Male)
Epoch [5/10] - Train Loss: 0.3842, Train Accuracy: 81.85% (Task A: White Male)
Epoch [6/10] - Train Loss: 0.3792, Train Accuracy: 81.69% (Task A: White Male)
Epoch [7/10] - Train Loss: 0.3764, Train Accuracy: 82.16% (Task A: White Male)
Epoch [8/10] - Train Loss: 0.3740, Train Accuracy: 82.21% (Task A: White Male)
Epoch [9/10] - Train Loss: 0.3742, Train Accuracy: 82.04% (Task A: White Male)
Epoch [10/10] - Train Loss: 0.3723, Train Accuracy: 82.18% (Task A: White Male)


In [None]:
def calculate_fisher_information(model, dataloader, criterion, device):
    model.train()  # Prevents issues with batch norm and dropout
    fisher_information = {name: torch.zeros_like(param) for name, param in model.named_parameters() if param.requires_grad}

    total_samples = 0

    for batch in dataloader:
        if len(batch) == 3:  # (inputs, labels, sensitive_attrs)
            inputs, labels, _ = batch
        elif len(batch) == 4:  # (inputs, labels, sensitive_attrs, race_attrs)
            inputs, labels, _, _ = batch  # Ignore last two values
        else:
            raise ValueError(f"Unexpected batch format: {len(batch)} elements")

        inputs, labels = inputs.to(device), labels.to(device)

        model.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()

        batch_size = inputs.size(0)
        total_samples += batch_size

        for name, param in model.named_parameters():
            if param.requires_grad:
                fisher_information[name] += (param.grad.pow(2) * batch_size).detach()

    # Normalize the Fisher Information Matrix
    for name in fisher_information:
        fisher_information[name] /= total_samples

    return fisher_information
fisher_information_A = calculate_fisher_information(model, train_loader_A, criterion, device)

In [None]:
def train_with_ewc(model, train_loader, criterion, optimizer, fisher_information, old_params, device,
                   ewc_lambda, max_epochs=10):
    model.train()

    for epoch in range(max_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for batch in train_loader:
            if len(batch) == 3:
                inputs, labels, _ = batch
            elif len(batch) == 4:
                inputs, labels, _, _ = batch
            else:
                raise ValueError(f"Unexpected batch format: {len(batch)} elements")

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Compute EWC Loss
            ewc_loss = 0.0
            for (name, param), (_, old_param) in zip(model.named_parameters(), old_params.items()):
                if name in fisher_information:
                    ewc_loss += (fisher_information[name] * (param - old_param).pow(2)).sum()

            total_loss = loss + (ewc_lambda * ewc_loss)
            total_loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / total
        train_acc = 100 * correct / total

    return train_loss, train_acc



In [None]:
def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    all_preds = []
    all_labels = []
    all_sensitive_attrs_gender = []
    all_sensitive_attrs_race = []

    with torch.no_grad():
        for batch in loader:
            if len(batch) == 4:
                inputs, labels, gender, race = batch
                all_sensitive_attrs_gender.extend(gender.cpu().numpy())
                all_sensitive_attrs_race.extend(race.cpu().numpy())
            elif len(batch) == 3:
                inputs, labels, sensitive = batch
                all_sensitive_attrs_gender.extend(sensitive.cpu().numpy())  # Assuming gender in this case
            elif len(batch) == 2:
                inputs, labels = batch
            else:
                raise ValueError(f"Unexpected batch format with {len(batch)} elements.")

            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total

    # Compute fairness metrics and per-group accuracy
    if all_sensitive_attrs_gender and all_sensitive_attrs_race:
        fairness_metrics = calculate_fairness_metrics(
            all_labels, all_preds, all_sensitive_attrs_race, all_sensitive_attrs_gender
        )

        # Compute accuracy for each subgroup
        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)
        all_sensitive_attrs_race = np.array(all_sensitive_attrs_race)
        all_sensitive_attrs_gender = np.array(all_sensitive_attrs_gender)

        groups = {
            "White Male": (all_sensitive_attrs_race == 1) & (all_sensitive_attrs_gender == 1),
            "White Female": (all_sensitive_attrs_race == 1) & (all_sensitive_attrs_gender == 0),
            "Black Male": (all_sensitive_attrs_race == 0) & (all_sensitive_attrs_gender == 1),
            "Black Female": (all_sensitive_attrs_race == 0) & (all_sensitive_attrs_gender == 0),
        }

        group_accuracies = {}
        for group_name, group_idx in groups.items():
            if np.sum(group_idx) > 0:
                group_acc = (all_preds[group_idx] == all_labels[group_idx]).sum() / np.sum(group_idx)
                group_accuracies[group_name] = group_acc * 100
            else:
                group_accuracies[group_name] = None  # Handle empty groups

    else:
        fairness_metrics = {}
        group_accuracies = {}

    return total_loss / len(loader), accuracy, fairness_metrics, group_accuracies


In [None]:
# Define EWC regularization strength
ewc_lambda = 100
epochs = 1

# Train sequentially on Task B (White Female)
for epoch in range(epochs):
    train_loss, train_acc = train_with_ewc(model, train_loader_B, criterion, optimizer,
                                           fisher_information_A, old_params_A, device, ewc_lambda)
# Update Fisher Information and Parameters for Task B
fisher_information_B = calculate_fisher_information(model, train_loader_B, criterion, device)
old_params_B = {name: param.clone().detach() for name, param in model.named_parameters() if param.requires_grad}

# Train sequentially on Task C (Black Male)
for epoch in range(epochs):
    train_loss, train_acc = train_with_ewc(model, train_loader_C, criterion, optimizer,
                                           fisher_information_B, old_params_B, device, ewc_lambda)
# Update Fisher Information and Parameters for Task C
fisher_information_C = calculate_fisher_information(model, train_loader_C, criterion, device)
old_params_C = {name: param.clone().detach() for name, param in model.named_parameters() if param.requires_grad}

# Train sequentially on Task D (Black Female)
for epoch in range(epochs):
    train_loss, train_acc = train_with_ewc(model, train_loader_D, criterion, optimizer,
                                           fisher_information_C, old_params_C, device, ewc_lambda)

# Final evaluation after all tasks
test_loss, test_acc, fairness_metrics, group_accuracies = evaluate(model, test_loader, criterion, device)

# Print final accuracy after training on all tasks
print(f"\nTraining completed for Task D (Black Female) with EWC.")
print(f"Final Test Accuracy: {test_acc:.2f}%")

# Display subgroup accuracies
print("\nSubgroup Accuracies:")
for group, acc in group_accuracies.items():
    print(f"{group}: {acc:.2f}%" if acc is not None else f"{group}: No samples in batch")

# Display fairness metrics after all tasks
print("\nFairness Metrics after training on all tasks (A → B → C → D):")
print(f"Demographic Parity: {fairness_metrics['Demographic Parity']:.4f}")
print(f"Equalized Odds (TPR Difference): {fairness_metrics['Equalized Odds (TPR Difference)']:.4f}")
print(f"Equalized Odds (FPR Difference): {fairness_metrics['Equalized Odds (FPR Difference)']:.4f}")



Training completed for Task D (Black Female) with EWC.
Final Test Accuracy: 81.95%

Subgroup Accuracies:
White Male: 76.84%
White Female: 90.41%
Black Male: 82.01%
Black Female: 96.51%

Fairness Metrics after training on all tasks (A → B → C → D):
Demographic Parity: 0.1158
Equalized Odds (TPR Difference): 0.1212
Equalized Odds (FPR Difference): 0.0251
