<a href="https://colab.research.google.com/github/maverick-0215/Fairness-via-Continual-Learning/blob/main/Testing_Fairness_with_EWC_on_COMPAS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim

import torch.nn.functional as F
from torchvision import datasets, transforms

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch

# Load COMPAS dataset
url = "https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv"
data = pd.read_csv(url)

# Drop identifying columns
cols_to_drop = ['id', 'name', 'first', 'last', 'compas_screening_date', 'dob', 'age_cat',
                'c_jail_in', 'c_jail_out', 'c_case_number', 'c_offense_date', 'c_arrest_date',
                'c_days_from_compas', 'c_charge_desc', 'r_charge_desc', 'r_case_number',
                'r_offense_date', 'r_charge_degree', 'r_days_from_arrest', 'violent_recid',
                'screening_date', 'v_type_of_assessment', 'v_decile_score', 'v_score_text',
                'decile_score.1', 'type_of_assessment', 'assessment_reason', 'score_text',
                'decile_score', 'start', 'end']
data.drop(columns=[col for col in cols_to_drop if col in data.columns], inplace=True)

# Filter rows
data = data[abs(data['days_b_screening_arrest']) <= 30]
data = data[data['is_recid'] != -1]
data = data[data['c_charge_degree'] != 'O']
data = data[data['race'].isin(['African-American', 'Caucasian'])]  # Only Black and White

# Apply One-Hot Encoding to categorical columns
data = pd.get_dummies(data, columns=data.select_dtypes(include=['object']).columns)

# Extract target column
y = data['two_year_recid'].values

# Drop target column from features
X = data.drop(columns=['two_year_recid'])

# Standardize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.long)

# Extract sensitive attributes
gender_attr = data['sex_Male'].values   # 1 for Male, 0 for Female
race_attr = data['race_Caucasian'].values  # 1 for White, 0 for Black

# Split into train and test sets
X_train, X_test, y_train, y_test, gender_train, gender_test, race_train, race_test = train_test_split(
    X_tensor, y_tensor, gender_attr, race_attr, test_size=0.2, random_state=42
)


In [None]:
print(X_train.shape)

torch.Size([4222, 5725])


In [None]:
data

Unnamed: 0,age,juv_fel_count,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,is_recid,is_violent_recid,priors_count.1,event,...,out_custody_2016-03-28,out_custody_2016-03-29,out_custody_2016-03-30,out_custody_2016-04-01,out_custody_2016-04-04,out_custody_2016-04-05,out_custody_2016-04-08,out_custody_2016-04-09,out_custody_2016-04-18,out_custody_2020-01-01
1,34,0,0,0,0,-1.0,1,1,0,1,...,False,False,False,False,False,False,False,False,False,False
2,24,0,0,1,4,-1.0,1,0,4,0,...,False,False,False,False,False,False,False,False,False,False
6,41,0,0,0,14,-1.0,1,0,14,1,...,False,False,False,False,False,False,False,False,False,False
8,39,0,0,0,0,-1.0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
10,27,0,0,0,0,-1.0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7207,30,0,0,0,0,-1.0,1,0,0,1,...,False,False,False,False,False,False,False,False,False,False
7208,20,0,0,0,0,-1.0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
7209,23,0,0,0,0,-1.0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False
7210,23,0,0,0,0,-1.0,0,0,0,0,...,False,False,False,False,False,False,False,False,False,False


In [None]:
from torch.utils.data import Dataset, DataLoader

class CompasDataset(Dataset):
    def __init__(self, X, y, gender_attrs, race_attrs):
        self.X = X
        self.y = y
        self.gender_attrs = gender_attrs
        self.race_attrs = race_attrs

    def __len__(self):
        return len(self.X)

    def __getitem__(self, index):
        return self.X[index], self.y[index], self.gender_attrs[index], self.race_attrs[index]


# Create DataLoaders
train_dataset = CompasDataset(X_train, y_train, gender_train, race_train)
test_dataset = CompasDataset(X_test, y_test, gender_test, race_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 2)  # Output is 2 because it's a binary classification problem
        )

    def forward(self, x):
        return self.model(x)
def train(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for inputs, labels, _,_ in loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    accuracy = 100 * correct / total
    return total_loss / len(loader), accuracy

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

def calculate_fairness_metrics(true_labels, preds, race_attrs, gender_attrs):
    metrics = {}

    # Convert to numpy arrays
    true_labels = np.array(true_labels)
    preds = np.array(preds)
    race_attrs = np.array(race_attrs)  # 1 = White, 0 = Black
    gender_attrs = np.array(gender_attrs)  # 1 = Male, 0 = Female

    # Group definitions for White Male, White Female, Black Male, Black Female
    groups = {
        "White Male": (race_attrs == 1) & (gender_attrs == 1),
        "White Female": (race_attrs == 1) & (gender_attrs == 0),
        "Black Male": (race_attrs == 0) & (gender_attrs == 1),
        "Black Female": (race_attrs == 0) & (gender_attrs == 0),
    }

    pos_rates = {}
    tpr_rates = {}
    fpr_rates = {}

    for group_name, group_idx in groups.items():
        if np.sum(group_idx) == 0:
            continue  # Skip groups with no samples

        # Positive rate (percentage of positives in the group)
        pos_rate = np.mean(preds[group_idx])

        # Confusion matrix for the group
        cm = confusion_matrix(true_labels[group_idx], preds[group_idx], labels=[0, 1])

        # True Positive Rate (TPR) and False Positive Rate (FPR)
        tpr = cm[1, 1] / (cm[1, 1] + cm[1, 0]) if (cm[1, 1] + cm[1, 0]) > 0 else 0
        fpr = cm[0, 1] / (cm[0, 1] + cm[0, 0]) if (cm[0, 1] + cm[0, 0]) > 0 else 0

        pos_rates[group_name] = pos_rate
        tpr_rates[group_name] = tpr
        fpr_rates[group_name] = fpr

    # Compute fairness metrics
    metrics["Demographic Parity"] = max(pos_rates.values()) - min(pos_rates.values())
    metrics["Equalized Odds (TPR Difference)"] = max(tpr_rates.values()) - min(tpr_rates.values())
    metrics["Equalized Odds (FPR Difference)"] = max(fpr_rates.values()) - min(fpr_rates.values())

    return metrics


In [None]:
import torch
import numpy as np
from sklearn.metrics import confusion_matrix

def evaluate(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    all_preds = []
    all_labels = []
    all_sensitive_attrs_gender = []
    all_sensitive_attrs_race = []

    with torch.no_grad():
        for batch in loader:
            if len(batch) == 4:
                inputs, labels, gender, race = batch
                all_sensitive_attrs_gender.extend(gender.cpu().numpy())
                all_sensitive_attrs_race.extend(race.cpu().numpy())
            elif len(batch) == 3:
                inputs, labels, sensitive = batch
                all_sensitive_attrs_gender.extend(sensitive.cpu().numpy())  # Assuming gender in this case
            elif len(batch) == 2:
                inputs, labels = batch
            else:
                raise ValueError(f"Unexpected batch format with {len(batch)} elements.")

            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total

    # Compute fairness metrics and per-group accuracy
    if all_sensitive_attrs_gender and all_sensitive_attrs_race:
        fairness_metrics = calculate_fairness_metrics(
            all_labels, all_preds, all_sensitive_attrs_race, all_sensitive_attrs_gender
        )

        # Compute accuracy for each subgroup
        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)
        all_sensitive_attrs_race = np.array(all_sensitive_attrs_race)
        all_sensitive_attrs_gender = np.array(all_sensitive_attrs_gender)

        groups = {
            "White Male": (all_sensitive_attrs_race == 1) & (all_sensitive_attrs_gender == 1),
            "White Female": (all_sensitive_attrs_race == 1) & (all_sensitive_attrs_gender == 0),
            "Black Male": (all_sensitive_attrs_race == 0) & (all_sensitive_attrs_gender == 1),
            "Black Female": (all_sensitive_attrs_race == 0) & (all_sensitive_attrs_gender == 0),
        }

        group_accuracies = {}
        for group_name, group_idx in groups.items():
            if np.sum(group_idx) > 0:
                group_acc = (all_preds[group_idx] == all_labels[group_idx]).sum() / np.sum(group_idx)
                group_accuracies[group_name] = group_acc * 100
            else:
                group_accuracies[group_name] = None  # Handle empty groups

    else:
        fairness_metrics = {}
        group_accuracies = {}

    return total_loss / len(loader), accuracy, fairness_metrics, group_accuracies


# Training and Evaluation Loop
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size=X_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 10

avg_dp = 0
avg_eo_diff_tpr = 0
avg_eo_diff_fpr = 0

for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    test_loss, test_acc, fairness_metrics, group_accuracies = evaluate(model, test_loader, criterion, device)

    print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}% "
          f"- Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}%\n")

    # # Print Subgroup Accuracies
    # print("Subgroup Accuracies:")
    # for group, acc in group_accuracies.items():
    #     print(f"{group}: {acc:.2f}%" if acc is not None else f"{group}: No samples in batch")

    avg_dp += fairness_metrics.get('Demographic Parity', 0)
    avg_eo_diff_tpr += fairness_metrics.get('Equalized Odds (TPR Difference)', 0)
    avg_eo_diff_fpr += fairness_metrics.get('Equalized Odds (FPR Difference)', 0)

# Print Final Fairness Metrics
print("\nFinal Fairness Metrics:")
print(f"Average Demographic Parity = {avg_dp / epochs:.4f}")
print(f"Average Equalized Odds (TPR Difference) = {avg_eo_diff_tpr / epochs:.4f}")
print(f"Average Equalized Odds (FPR Difference) = {avg_eo_diff_fpr / epochs:.4f}")


Epoch [1/10] - Train Loss: 0.4442, Train Accuracy: 83.33% - Test Loss: 0.3378, Test Accuracy: 87.59%

Epoch [2/10] - Train Loss: 0.1689, Train Accuracy: 93.79% - Test Loss: 0.2967, Test Accuracy: 89.30%

Epoch [3/10] - Train Loss: 0.0484, Train Accuracy: 97.96% - Test Loss: 0.3319, Test Accuracy: 90.25%

Epoch [4/10] - Train Loss: 0.0259, Train Accuracy: 99.22% - Test Loss: 0.3677, Test Accuracy: 90.62%

Epoch [5/10] - Train Loss: 0.0235, Train Accuracy: 99.31% - Test Loss: 0.3555, Test Accuracy: 88.35%

Epoch [6/10] - Train Loss: 0.0181, Train Accuracy: 99.55% - Test Loss: 0.3551, Test Accuracy: 87.69%

Epoch [7/10] - Train Loss: 0.0097, Train Accuracy: 99.72% - Test Loss: 0.3883, Test Accuracy: 88.16%

Epoch [8/10] - Train Loss: 0.0039, Train Accuracy: 99.98% - Test Loss: 0.3823, Test Accuracy: 88.54%

Epoch [9/10] - Train Loss: 0.0014, Train Accuracy: 100.00% - Test Loss: 0.3848, Test Accuracy: 89.49%

Epoch [10/10] - Train Loss: 0.0007, Train Accuracy: 100.00% - Test Loss: 0.3931, 

The above results are just for normal nn trained on the whole data(males + females)



## Training nn with Task A(only male data) and Task B(only female data) with EWC loss function.

In [None]:
# Split into four subgroups
white_male = data[(data['sex_Male'] == 1) & (data['race_Caucasian'] == 1)]
white_female = data[(data['sex_Male'] == 0) & (data['race_Caucasian'] == 1)]
black_male = data[(data['sex_Male'] == 1) & (data['race_Caucasian'] == 0)]
black_female = data[(data['sex_Male'] == 0) & (data['race_Caucasian'] == 0)]

# Function to process each group
def process_group(group_data):
    y = group_data['is_recid'].values
    X = group_data.drop(['is_recid'], axis=1)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    return train_test_split(X_tensor, y_tensor, test_size=0.2)

# Apply processing to each group
X_wm_train, X_wm_test, y_wm_train, y_wm_test = process_group(white_male)
X_wf_train, X_wf_test, y_wf_train, y_wf_test = process_group(white_female)
X_bm_train, X_bm_test, y_bm_train, y_bm_test = process_group(black_male)
X_bf_train, X_bf_test, y_bf_train, y_bf_test = process_group(black_female)


In [None]:
print("X_wmale_train shape:", X_wm_train.shape)
print("X_wfemale_train shape:", X_wf_train.shape)
print("X_bfemale_train shape:", X_bm_train.shape)
print("X_bfemale_train shape:", X_bf_train.shape)



X_wmale_train shape: torch.Size([1296, 5725])
X_wfemale_train shape: torch.Size([385, 5725])
X_bfemale_train shape: torch.Size([2100, 5725])
X_bfemale_train shape: torch.Size([439, 5725])


In [None]:
# Creating DataLoaders for the four groups
train_dataset_A = CompasDataset(
    X_wm_train, y_wm_train,
    gender_attrs=np.ones(len(y_wm_train)),  # Male = 1
    race_attrs=np.ones(len(y_wm_train))  # White = 1
)

train_dataset_B = CompasDataset(
    X_wf_train, y_wf_train,
    gender_attrs=np.zeros(len(y_wf_train)),  # Female = 0
    race_attrs=np.ones(len(y_wf_train))  # White = 1
)

train_dataset_C = CompasDataset(
    X_bm_train, y_bm_train,
    gender_attrs=np.ones(len(y_bm_train)),  # Male = 1
    race_attrs=np.zeros(len(y_bm_train))  # Black = 0
)

train_dataset_D = CompasDataset(
    X_bf_train, y_bf_train,
    gender_attrs=np.zeros(len(y_bf_train)),  # Female = 0
    race_attrs=np.zeros(len(y_bf_train))  # Black = 0
)

# Creating DataLoaders
train_loader_A = DataLoader(train_dataset_A, batch_size=64, shuffle=True)
train_loader_B = DataLoader(train_dataset_B, batch_size=64, shuffle=True)
train_loader_C = DataLoader(train_dataset_C, batch_size=64, shuffle=True)
train_loader_D = DataLoader(train_dataset_D, batch_size=64, shuffle=True)


In [None]:
# Define model, loss, and optimizer for Task A (White Male - Group 0)
model = NeuralNet(input_size=X_wm_train.shape[1]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 2

# Train on Task A (White Male)
for epoch in range(epochs):
    train_loss, train_acc = train(model, train_loader_A, criterion, optimizer, device)
    print(f"Epoch [{epoch+1}/{epochs}] - Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}% (Task A: White Male)")

# Save model parameters after Task A
old_params_A = {
    name: param.clone().detach()
    for name, param in model.named_parameters()
    if param.requires_grad
}


Epoch [1/2] - Train Loss: 0.6112, Train Accuracy: 73.30% (Task A: White Male)
Epoch [2/2] - Train Loss: 0.2645, Train Accuracy: 92.98% (Task A: White Male)


In [None]:
def calculate_fisher_information(model, dataloader, criterion, device):
    model.train()  # Prevents issues with batch norm and dropout
    fisher_information = {name: torch.zeros_like(param) for name, param in model.named_parameters() if param.requires_grad}

    total_samples = 0

    for batch in dataloader:
        if len(batch) == 3:  # (inputs, labels, sensitive_attrs)
            inputs, labels, _ = batch
        elif len(batch) == 4:  # (inputs, labels, sensitive_attrs, race_attrs)
            inputs, labels, _, _ = batch  # Ignore last two values
        else:
            raise ValueError(f"Unexpected batch format: {len(batch)} elements")

        inputs, labels = inputs.to(device), labels.to(device)

        model.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()

        batch_size = inputs.size(0)
        total_samples += batch_size

        for name, param in model.named_parameters():
            if param.requires_grad:
                fisher_information[name] += (param.grad.pow(2) * batch_size).detach()

    # Normalize the Fisher Information Matrix
    for name in fisher_information:
        fisher_information[name] /= total_samples

    return fisher_information
fisher_information_A = calculate_fisher_information(model, train_loader_A, criterion, device)

In [None]:
def train_with_ewc(model, train_loader, criterion, optimizer, fisher_information, old_params, device,
                   ewc_lambda, max_epochs=10):
    model.train()

    for epoch in range(max_epochs):
        running_loss = 0.0
        correct = 0
        total = 0

        for batch in train_loader:
            if len(batch) == 3:
                inputs, labels, _ = batch
            elif len(batch) == 4:
                inputs, labels, _, _ = batch
            else:
                raise ValueError(f"Unexpected batch format: {len(batch)} elements")

            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Compute EWC Loss
            ewc_loss = 0.0
            for (name, param), (_, old_param) in zip(model.named_parameters(), old_params.items()):
                if name in fisher_information:
                    ewc_loss += (fisher_information[name] * (param - old_param).pow(2)).sum()

            total_loss = loss + (ewc_lambda * ewc_loss)
            total_loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / total
        train_acc = 100 * correct / total

    return train_loss, train_acc



In [None]:
# Define EWC regularization strength
ewc_lambda = 5
epochs = 1


for epoch in range(epochs):
    train_loss, train_acc = train_with_ewc(model, train_loader_B, criterion, optimizer,
                                           fisher_information_A, old_params_A, device, ewc_lambda)
# Update Fisher Information and Parameters for Task B
fisher_information_B = calculate_fisher_information(model, train_loader_B, criterion, device)
old_params_B = {name: param.clone().detach() for name, param in model.named_parameters() if param.requires_grad}

# Train sequentially on Task C (Black Male)
for epoch in range(epochs):
    train_loss, train_acc = train_with_ewc(model, train_loader_C, criterion, optimizer,
                                           fisher_information_B, old_params_B, device, ewc_lambda)
# Update Fisher Information and Parameters for Task C
fisher_information_C = calculate_fisher_information(model, train_loader_C, criterion, device)
old_params_C = {name: param.clone().detach() for name, param in model.named_parameters() if param.requires_grad}

# Train sequentially on Task D (Black Female)
for epoch in range(epochs):
    train_loss, train_acc = train_with_ewc(model, train_loader_D, criterion, optimizer,
                                           fisher_information_C, old_params_C, device, ewc_lambda)



# Final evaluation after all tasks
test_loss, test_acc, fairness_metrics, group_accuracies = evaluate(model, test_loader, criterion, device)

# Print final accuracy after training on all tasks
print(f"\nTraining completed for Task B, Female with EWC.")
print(f"Final Test Accuracy: {test_acc:.2f}%")

# Display subgroup accuracies
print("\nSubgroup Accuracies:")
for group, acc in group_accuracies.items():
    print(f"{group}: {acc:.2f}%" if acc is not None else f"{group}: No samples in batch")

# Display fairness metrics after all tasks
print("\nFairness Metrics after training on all tasks (A → B ):")
print(f"Demographic Parity: {fairness_metrics['Demographic Parity']:.4f}")
print(f"Equalized Odds (TPR Difference): {fairness_metrics['Equalized Odds (TPR Difference)']:.4f}")
print(f"Equalized Odds (FPR Difference): {fairness_metrics['Equalized Odds (FPR Difference)']:.4f}")



Training completed for Task B, Female with EWC.
Final Test Accuracy: 92.71%

Subgroup Accuracies:
White Male: 89.84%
White Female: 94.90%
Black Male: 92.84%
Black Female: 97.62%

Fairness Metrics after training on all tasks (A → B ):
Demographic Parity: 0.2416
Equalized Odds (TPR Difference): 0.1116
Equalized Odds (FPR Difference): 0.0872
