<a href="https://colab.research.google.com/github/taweener11/darkSideUnmasked/blob/main/clean_gender_celeba.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:

import os

cores = os.cpu_count() # Count the number of cores in a computer
cores

8

In [3]:

#@title shell pipeline for unzipping! this needs to run every time

!unzip -q "/content/drive/My Drive/Datasets/celeba/img_align_celeba.zip" -d "/content/celeba/"

In [4]:
data_dir = '/content' # setting it to the local environment

In [5]:
import torch
from torchvision import datasets, transforms

In [6]:
# defining a transform that is smaller per suggestion of rasmus

image_size = 64

transform=transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                          std=[0.5, 0.5, 0.5])
])

In [7]:
# transfering files from gdrive to here so that they would work without us uploading manually all the time
# import module
import shutil

shutil.copyfile('/content/drive/My Drive/Datasets/celeba/identity_CelebA.txt', '/content/celeba/identity_CelebA.txt')
shutil.copyfile('/content/drive/My Drive/Datasets/celeba/list_attr_celeba.txt', '/content/celeba/list_attr_celeba.txt')
shutil.copyfile('/content/drive/My Drive/Datasets/celeba/list_bbox_celeba.txt', '/content/celeba/list_bbox_celeba.txt')
shutil.copyfile('/content/drive/My Drive/Datasets/celeba/list_landmarks_align_celeba.txt', '/content/celeba/list_landmarks_align_celeba.txt')
shutil.copyfile('/content/drive/My Drive/Datasets/celeba/list_eval_partition.txt', '/content/celeba/list_eval_partition.txt')



'/content/celeba/list_eval_partition.txt'

In [8]:
from torchvision.datasets import CelebA


# it creates a folder on the go!

try:
    dataset = CelebA(
        root='/content',
        split='train',
        target_type='attr',
        transform=transform,
        download=False # this works now!!!! its just important that it is in the root folder
    )
except Exception as e:
    print("CelebA error:", e)

In [None]:
#@title sanity check

import os

data_dir = '/content/celeba'

print("Root contents:", os.listdir(data_dir))
print("Images folder exists:", os.path.isdir(os.path.join(data_dir, 'img_align_celeba')))
print("Sample images:", os.listdir(os.path.join(data_dir, 'img_align_celeba'))[:3])
print("Has attribute file:", os.path.isfile(os.path.join(data_dir, 'list_attr_celeba.txt')))

Root contents: ['img_align_celeba', 'list_bbox_celeba.txt', 'identity_CelebA.txt', 'list_landmarks_align_celeba.txt', 'list_attr_celeba.txt', 'list_eval_partition.txt']
Images folder exists: True
Sample images: ['053361.jpg', '189109.jpg', '130880.jpg']
Has attribute file: True


In [None]:
#@title sanity check 2 & the moment of truth!!

# adding a dataloader and a basic model

from torch.utils.data import DataLoader
train_loader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)

In [9]:
#@title pipeline for wandb

import wandb

In [10]:
#@title adjusting the training data, different distributions
import numpy as np
from torch.utils.data import Subset


test_dataset = CelebA(
    root='/content',
    split='test',
    target_type='attr',
    transform=transform,
    download=False
)


In [11]:

import pandas as pd
# Get the identity information from the training dataset
identity_labels = dataset.identity
# Convert to a pandas Series for easier counting
identity_series = pd.Series(identity_labels.squeeze().numpy())
identity_counts = identity_series.value_counts()
top_1000_identities = identity_counts.nlargest(1000)
# Get the indices corresponding to the top 1000 identities
top_1000_indices = identity_series[identity_series.isin(top_1000_identities.index)].index
# Create a subset of the dataset containing only the top 1000 identities
dataset_top_1000 = Subset(dataset, top_1000_indices)


min_samples = top_1000_identities.min()
max_samples = top_1000_identities.max()

print(f"Minimum samples per identity: {min_samples}")
print(f"Maximum samples per identity: {max_samples}")


Minimum samples per identity: 30
Maximum samples per identity: 35


In [12]:
import numpy as np

male_idx = test_dataset.attr_names.index('Male')

gender_labels_test_subset = []
for i in top_1000_indices:
  # Note: As discussed before, using training indices on the test dataset
  # might lead to issues or misalignment. Assuming this is intended for now.
  if i < len(test_dataset):
    gender_labels_test_subset.append(test_dataset.attr[i, male_idx])


# Convert the list to a NumPy array
gender_labels_test_subset_np = np.array(gender_labels_test_subset)


# Now use np.where on the NumPy array
# This is the part that fixes the DeprecationWarning
female_test_subset_indices = np.where(gender_labels_test_subset_np == 0)[0]
male_test_subset_indices   = np.where(gender_labels_test_subset_np ==  1)[0]


print(len(female_test_subset_indices))
print(len(male_test_subset_indices))


N_test = min(len(female_test_subset_indices), len(male_test_subset_indices))

rng_test = np.random.default_rng(seed=42)
shuffled_female_test_subset_indices = np.copy(female_test_subset_indices)
shuffled_male_test_subset_indices   = np.copy(male_test_subset_indices)
rng_test.shuffle(shuffled_female_test_subset_indices)
rng_test.shuffle(shuffled_male_test_subset_indices)


test_subsets = {}

# Create training subsets
test_subsets_f = {}
test_subsets_m = {}
# even split for all examples. we can change this later but we want to be able to generalize... we want there to be the same number of examples for men and women and for these to be in the same set...
# we will put this to the loop.


2477
1577


In [13]:
#@title pipeline for # of classes subsetting
import numpy as np
from torch.utils.data import Subset


# choose smallest n
# proportions = [0, 0.1, 0.25, 0.5, 0.75, 1.0] # changed this bc it doesn't make sense
proportions = [0.25, 0.5, 0.75]
male_idx = test_dataset.attr_names.index('Male')


male_idx_train = dataset.attr_names.index('Male')
gender_labels_train_subset = dataset.attr[top_1000_indices, male_idx_train] # gender from training dataset
female_train_subset_indices = np.where(gender_labels_train_subset == 0)[0]
male_train_subset_indices   = np.where(gender_labels_train_subset ==  1)[0]

N_train = min(len(female_train_subset_indices), len(male_train_subset_indices))

rng_train = np.random.default_rng(seed=42)
shuffled_female_train_subset_indices = np.copy(female_train_subset_indices)
shuffled_male_train_subset_indices   = np.copy(male_train_subset_indices)
rng_train.shuffle(shuffled_female_train_subset_indices)
rng_train.shuffle(shuffled_male_train_subset_indices)


# training subsets
train_subsets = {}
for p in proportions:
    num_females_train = int(N_train * p)
    num_males_train = N_train - num_females_train

    q = min(p, 1-p)
    num_females_test = int(N_test * q) # even split for testing
    num_males_test = num_females_test

    chosen_female_train = shuffled_female_train_subset_indices[:num_females_train] if num_females_train > 0 else np.array([], dtype=int)
    chosen_male_train   = shuffled_male_train_subset_indices[:num_males_train]   if num_males_train > 0   else np.array([], dtype=int)

    chosen_female_test = shuffled_female_test_subset_indices[:num_females_test]
    chosen_male_test   = shuffled_male_test_subset_indices[:num_males_test]

    # these indices are relative to the 'dataset_top_1000' subset,
    # so we need to map them back to the original 'dataset' indices if Subset requires it.
    # since top_1000_indices is the mapping, we can directly use that:
    original_indices_train = np.concatenate([
        top_1000_indices[chosen_female_train],
        top_1000_indices[chosen_male_train]
    ]).astype(int)
    rng_train.shuffle(original_indices_train)
    train_subsets[p] = Subset(dataset, original_indices_train)
    test_subsets_f[p] = Subset(test_dataset, chosen_female_test)
    test_subsets_m[p] = Subset(test_dataset, chosen_male_test)



# Verification as before
for p in proportions:
    # Verification for the training subsets
    indices_train = train_subsets[p].indices
    # Need to get genders for these original training indices from the *full* training dataset
    genders_train = dataset.attr[indices_train, male_idx_train]
    percent_female_train = (genders_train == 0).sum()/len(indices_train) if len(indices_train) > 0 else 0
    print(f"Train Subset (Prop {int(p*100)}%): Target {int(p*100)}% -- Actual {percent_female_train*100:.2f}% females, {(genders_train == 0).sum()} samples")


    number_female_test = len(test_subsets_f[p].indices)
    number_male_test = len(test_subsets_m[p].indices)
    print(f"Number of female test samples: {number_female_test}")
    print(f"Number of male test samples: {number_male_test}")




Train Subset (Prop 25%): Target 25% -- Actual 24.99% females, 2439 samples
Number of female test samples: 394
Number of male test samples: 394
Train Subset (Prop 50%): Target 50% -- Actual 49.99% females, 4879 samples
Number of female test samples: 788
Number of male test samples: 788
Train Subset (Prop 75%): Target 75% -- Actual 75.00% females, 7319 samples
Number of female test samples: 394
Number of male test samples: 394


In [None]:
#@title pipeline for class-based subsetting
import numpy as np
from torch.utils.data import Subset

# we know all classes have around 30 examples each
# this could give us a split of 10/30, 20/20, 30/10
# for the test examples we dont care bc all the class indices are going to be there anyways
proportions = [0.25, 0.5, 0.75]

# index by the even split dataset

indices_train = train_subsets[0.5].indices
# Need to get genders for these original training indices from the *full* training dataset
genders_train = dataset.attr[indices_train, male_idx_train]
percent_female_train = (genders_train == 0).sum()/len(indices_train) if len(indices_train) > 0 else 0
print(f"Train Subset (Prop {int(0.5*100)}%): Target {int(0.5*100)}% -- Actual {percent_female_train*100:.2f}% females, {(genders_train == 0).sum()} samples")

# Assumptions for your dataset (update if needed)
labels = np.array(dataset.identity[train_subsets[0.5].indices]).squeeze()


num_classes = len(np.unique(labels))
print(num_classes)

# Get the index for the 'Male' attribute in the dataset
male_idx_train = dataset.attr_names.index('Male')
# we also need the gender labels for the instances within dataset_top_1000,
# accessed from the original dataset's attributes using the subset indices.
gender_labels_top_1000 = dataset.attr[train_subsets[0.5].indices, male_idx_train].squeeze().numpy()


train_females = {}
train_males = {}

rng = np.random.default_rng(seed=42)
base_number = 30  # everything has more than 30 examples



train_subsets_new={}

for prop in proportions:
    selected_male_indices = []
    selected_female_indices = []
    # the indices 'c' here refer to the unique identity classes within the top 1000.
    for c in np.unique(labels):
        # indices within the `train_subsets[0.5]` even split by gender array
        # that correspond to class 'c' AND are female
        female_class_subset_indices = np.where((labels == c) & (gender_labels_top_1000 == 0))[0]

        rng.shuffle(female_class_subset_indices)

        if len(female_class_subset_indices) < base_number:
            n_female = int(np.floor(len(female_class_subset_indices) * prop))
        else:
            n_female = int(np.floor(base_number * prop))

        # n_female = int(np.floor(len(female_class_subset_indices) * prop))

        # get the original dataset indices for these selected items
        original_female_indices = train_subsets[0.5].indices[female_class_subset_indices[:n_female]]
        selected_female_indices.extend(original_female_indices)

        # indices within the `train_subsets[0.5]` even split by gender array
        # that correspond to class 'c' AND are male
        male_class_subset_indices = np.where((labels == c) & (gender_labels_top_1000 == 1))[0]
        rng.shuffle(male_class_subset_indices)
        n_male = int(np.floor(len(male_class_subset_indices) * (1-prop)))

        if len(male_class_subset_indices) < base_number:
            n_male = int(np.floor(len(male_class_subset_indices) * (1-prop)))
        else:
            n_male = int(np.floor(base_number * (1-prop)))


        # Get the original dataset indices for these selected items
        original_male_indices = train_subsets[0.5].indices[male_class_subset_indices[:n_male]]
        selected_male_indices.extend(original_male_indices)


    # make subsets (using original dataset and selected original indices)

    original_indices_train = np.concatenate([
        top_1000_indices[chosen_female_train],
        top_1000_indices[chosen_male_train]
    ]).astype(int)
    rng_train.shuffle(original_indices_train)
    train_subsets_new[prop] = Subset(dataset, original_indices_train)


Train Subset (Prop 50%): Target 50% -- Actual 50.00% females, 4961 samples
1000
Proportion 0.25:
  Number of female samples: 983
  Number of male samples: 3562
Proportion 0.5:
  Number of female samples: 2302
  Number of male samples: 2372
Proportion 0.75:
  Number of female samples: 3456
  Number of male samples: 1096


In [None]:
#@title test datasets!!! now since we have all the classes we can just use the regular dataset


# print(len(shuffled_female_test_subset_indices))
# print(len(shuffled_male_test_subset_indices))

N_test = min(len(female_test_subset_indices), len(male_test_subset_indices))

test_subsets_f = {}
test_subsets_m = {}

chosen_female_test = shuffled_female_test_subset_indices[:N_test]
chosen_male_test   = shuffled_male_test_subset_indices[:N_test]


for p in proportions:
    test_subsets_f[p] = Subset(test_dataset, chosen_female_test)
    test_subsets_m[p] = Subset(test_dataset, chosen_male_test)


In [None]:
# creating dataloaders
from torch.utils.data import DataLoader

batch_size = 64

# train_loader = DataLoader(train_subsets[0.5], batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(test_subsets[0.5], batch_size=batch_size, shuffle=True)


In [None]:
import torch.nn as nn
import torch.nn.functional as F

## module for resnet-18

In [None]:
#@title putting in the utils here for easier dev
from torch.autograd import Variable
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader


def eval_robust(model, test_loader, pgd_attack, device):
    model.eval()
    robust_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            adv = pgd_attack(inputs, targets)
            outputs = model(adv)
            robust_loss += F.cross_entropy(outputs, targets).item()
            pred = outputs.max(1, keepdim=True)[1]
            correct += pred.eq(targets.view_as(pred)).sum().item()
    robust_loss /= len(test_loader.dataset)

    print('LinfPGD Attack: Average loss: {:.4f}, Robust Accuracy: {}/{} ({:.0f}%)'.format(
        robust_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    robust_accuracy = 100. * correct / len(test_loader.dataset)
    return robust_loss, robust_accuracy



In [None]:
def trades_loss(model,
                x_natural,
                y,
                optimizer,
                step_size=8/2550,
                epsilon=8/255,
                perturb_steps=10,
                beta=1.0):
    '''
    Source https://github.com/yaodongyu/TRADES/blob/master/trades.py
    '''
    # define KL-loss
    criterion_kl = nn.KLDivLoss(size_average=False)
    model.eval()
    batch_size = len(x_natural)

    # generate adversarial example
    x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
    for _ in range(perturb_steps):
        x_adv.requires_grad_()
        with torch.enable_grad():
            loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                   F.softmax(model(x_natural), dim=1))
        grad = torch.autograd.grad(loss_kl, [x_adv])[0]
        x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
        x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
        x_adv = torch.clamp(x_adv, 0.0, 1.0)

    model.train()

    x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)

    # zero gradient
    optimizer.zero_grad()

    # calculate robust loss
    logits = model(x_natural)
    loss_natural = F.cross_entropy(logits, y)
    loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                                    F.softmax(model(x_natural), dim=1))
    loss = loss_natural + beta * loss_robust
    return loss

In [None]:
class LinfPGDAttack(nn.Module):
    def __init__(self, model, epsilon, steps=10, step_size=0.003):
        super().__init__()
        self.model = model
        self.epsilon = epsilon
        self.steps = steps
        self.step_size = step_size

    def perturb(self, x_natural, y):
        x_adv = x_natural.clone().requires_grad_(True)
        with torch.enable_grad():
            for i in range(self.steps):

                self.model.zero_grad()
                # calculate loss
                output = self.model(x_adv)
                # Selecting the first column of y (assuming it's the identity label)
                loss = nn.CrossEntropyLoss()(output, y[:, 0])

                # gradient
                grad = torch.autograd.grad(loss, x_adv)[0]

                # clipping
                perturbation = torch.clamp(self.step_size * torch.sign(grad), -self.epsilon, self.epsilon)

                # clamping
                x_adv = torch.clamp(x_adv + perturbation, 0, 1)

        return x_adv

    def forward(self, x_natural, y):
        x_adv = self.perturb(x_natural, y)
        return x_adv

In [None]:
#@title initializing a run

# api key: bd1c08839d0c8c49e7c3efe9aabe2d9c644befb6

wandb.init(project="face-adv-fairness", name="celeba-balanced-datasets", config={"learning_rate": 0.001, "epochs": 30})

In [None]:
def train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, batch_size):
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        # Selecting the first column of targets, assuming it represents the identity label
        labels = targets[:, 0]

        if mode == 'natural':
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)  # Use labels instead of targets

        elif mode == 'adv_train':  # [Ref] https://arxiv.org/abs/1706.06083
            model.eval()
            adv_x = pgd_attack(inputs, targets)
            model.train()

            optimizer.zero_grad()
            outputs = model(adv_x)
            loss = criterion(outputs, labels)  # Use labels instead of targets

        elif mode == 'adv_train_trades':  # [Ref] https://arxiv.org/abs/1901.08573
            optimizer.zero_grad()

        # elif mode == 'adv_train_mixup': # [Ref] https://arxiv.org/abs/1710.09412
        #     model.eval()
        #     benign_inputs, benign_targets_a, benign_targets_b, benign_lam = mixup_data(inputs, targets)
        #     adv_x = pgd_attack(inputs, targets)
        #     adv_inputs, adv_targets_a, adv_targets_b, adv_lam = mixup_data(adv_x, targets)

        #     model.train()
        #     optimizer.zero_grad()

        #     benign_outputs = model(benign_inputs)
        #     adv_outputs = model(adv_inputs)
        #     loss_1 = mixup_criterion(criterion, benign_outputs, benign_targets_a, benign_targets_b, benign_lam)
        #     loss_2 = mixup_criterion(criterion, adv_outputs, adv_targets_a, adv_targets_b, adv_lam)

        #     loss = (loss_1 + loss_2) / 2

        else:
            print("No training mode specified.")
            raise ValueError()

        loss.backward()
        optimizer.step()

        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(inputs), len(train_loader) * batch_size,
                       100. * (batch_idx + 1) / len(train_loader), loss.item()))

            wandb.log({"training_loss": loss.item(), })

### resnet 18

In [None]:
#@title adjusting this with a simpler model

import torch.nn as nn
import torch.nn.functional as F

class LinfPGDAttack(nn.Module):
    def __init__(self, model, epsilon, steps=10, step_size=0.003):
        super().__init__()
        self.model = model
        self.epsilon = epsilon
        self.steps = steps
        self.step_size = step_size

    def perturb(self, x_natural, y):
        """
        Computes the gradient of the cross-entropy loss with respect to the input
        image `x_adv` and updates the image based on the gradient direction. The
        perturbation is clipped to ensure it stays within a specified epsilon range
        and is finally clamped to ensure pixel values are valid.

        The resulting perturbed image is returned.
        """
        # *********** Your code starts here ***********
        x_adv = x_natural.clone().requires_grad_(True)
        with torch.enable_grad():
            for i in range(self.steps):

                self.model.zero_grad()
                # calculate loss
                output = self.model(x_adv)
                loss = nn.CrossEntropyLoss()(output, y)


                # gradient
                grad = torch.autograd.grad(loss, x_adv)[0]


                # clipping
                perturbation = torch.clamp(self.step_size * torch.sign(grad), -self.epsilon, self.epsilon)

                # clamping
                x_adv = torch.clamp(x_adv + perturbation, 0, 1)





        # *********** Your code ends here *************

        return x_adv

    def forward(self, x_natural, y):
        x_adv = self.perturb(x_natural, y)
        return x_adv

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim



def eval_test(model, test_loader, device):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            test_loss += F.cross_entropy(outputs, targets).item()
            pred = outputs.max(1, keepdim=True)[1]
            correct += pred.eq(targets.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('Test: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy


def eval_robust(model, test_loader, pgd_attack, device):
    model.eval()
    robust_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            adv = pgd_attack(inputs, targets)
            outputs = model(adv)
            robust_loss += F.cross_entropy(outputs, targets).item()
            pred = outputs.max(1, keepdim=True)[1]
            correct += pred.eq(targets.view_as(pred)).sum().item()
    robust_loss /= len(test_loader.dataset)

    print('LinfPGD Attack: Average loss: {:.4f}, Robust Accuracy: {}/{} ({:.0f}%)'.format(
        robust_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    robust_accuracy = 100. * correct / len(test_loader.dataset)
    return robust_loss, robust_accuracy


def mixup_data(x, y, mixup_alpha=1.0):
    '''
    Source https://github.com/facebookresearch/mixup-cifar10/blob/main/train.py
    '''
    lam = np.random.beta(mixup_alpha, mixup_alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]

    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    '''
    Source https://github.com/facebookresearch/mixup-cifar10/blob/main/train.py
    '''
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


def trades_loss(model,
                x_natural,
                y,
                optimizer,
                step_size=0.003,
                epsilon=8/255,
                perturb_steps=10,
                beta=1.0):
    '''
    Source https://github.com/yaodongyu/TRADES/blob/master/trades.py
    '''
    # define KL-loss
    criterion_kl = nn.KLDivLoss(size_average=False)
    model.eval()
    batch_size = len(x_natural)

    # generate adversarial example
    x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
    for _ in range(perturb_steps):
        x_adv.requires_grad_()
        with torch.enable_grad():
            loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                   F.softmax(model(x_natural), dim=1))
        grad = torch.autograd.grad(loss_kl, [x_adv])[0]
        x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
        x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
        x_adv = torch.clamp(x_adv, 0.0, 1.0)

    model.train()

    x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)

    # zero gradient
    optimizer.zero_grad()

    # calculate robust loss
    logits = model(x_natural)
    loss_natural = F.cross_entropy(logits, y)
    loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                                    F.softmax(model(x_natural), dim=1))
    loss = loss_natural + beta * loss_robust
    return loss

In [None]:
def train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, batch_size):
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        labels = targets[:, 0] # the first column is the identity label

        if mode == 'natural':
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        elif mode == 'adv_train': # [Ref] https://arxiv.org/abs/1706.06083
            model.eval()
            adv_x = pgd_attack(inputs, targets)
            model.train()

            optimizer.zero_grad()
            outputs = model(adv_x)
            loss = criterion(outputs, targets)

        elif mode == 'adv_train_trades': # [Ref] https://arxiv.org/abs/1901.08573
            optimizer.zero_grad()
            loss = trades_loss(model=model, x_natural=inputs, y=targets, optimizer=optimizer)

        # elif mode == 'adv_train_mixup': # [Ref] https://arxiv.org/abs/1710.09412
        #     model.eval()
        #     benign_inputs, benign_targets_a, benign_targets_b, benign_lam = mixup_data(inputs, targets)
        #     adv_x = pgd_attack(inputs, targets)
        #     adv_inputs, adv_targets_a, adv_targets_b, adv_lam = mixup_data(adv_x, targets)

        #     model.train()
        #     optimizer.zero_grad()

        #     benign_outputs = model(benign_inputs)
        #     adv_outputs = model(adv_inputs)
        #     loss_1 = mixup_criterion(criterion, benign_outputs, benign_targets_a, benign_targets_b, benign_lam)
        #     loss_2 = mixup_criterion(criterion, adv_outputs, adv_targets_a, adv_targets_b, adv_lam)

        #     loss = (loss_1 + loss_2) / 2

        else:
            print("No training mode specified.")
            raise ValueError()

        loss.backward()
        optimizer.step()

        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(inputs), len(train_loader) * batch_size,
                       100. * (batch_idx + 1) / len(train_loader), loss.item()))



In [None]:
#@title resnet module

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out



class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion * 4, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18(num_classes=10):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)


In [None]:
#@title implementing pgd attacks


class LinfPGDAttack(nn.Module):
    def __init__(self, model, epsilon, steps=10, step_size=0.003):
        super().__init__()
        self.model = model
        self.epsilon = epsilon
        self.steps = steps
        self.step_size = step_size

    def perturb(self, x_natural, y):
        """
        Computes the gradient of the cross-entropy loss with respect to the input
        image `x_adv` and updates the image based on the gradient direction. The
        perturbation is clipped to ensure it stays within a specified epsilon range
        and is finally clamped to ensure pixel values are valid.

        The resulting perturbed image is returned.
        """
        # *********** Your code starts here ***********
        x_adv = x_natural.clone().requires_grad_(True)
        # Extract the identity label from the multi-dimensional target tensor
        labels = y[:, 0] # Assuming the first column is the identity label
        with torch.enable_grad():
            for i in range(self.steps):

                self.model.zero_grad()
                # calculate loss
                output = self.model(x_adv)
                # Use the extracted identity labels as the target for CrossEntropyLoss
                loss = nn.CrossEntropyLoss()(output, labels)


                # gradient
                grad = torch.autograd.grad(loss, x_adv)[0]


                # clipping
                perturbation = torch.clamp(self.step_size * torch.sign(grad), -self.epsilon, self.epsilon)

                # clamping
                x_adv = torch.clamp(x_adv + perturbation, 0, 1)

        # *********** Your code ends here *************

        return x_adv

    def forward(self, x_natural, y):
        x_adv = self.perturb(x_natural, y)
        return x_adv

In [None]:
#@title modified train and test functions for celeba

def train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, batch_size):
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        # Extract the identity label from the multi-dimensional target tensor
        labels = targets[:, 0] # Assuming the first column is the identity label


        if mode == 'natural':
            optimizer.zero_grad()
            outputs = model(inputs)
            # Use the extracted identity labels as the target for CrossEntropyLoss
            loss = criterion(outputs, labels)

        elif mode == 'adv_train': # [Ref] https://arxiv.org/abs/1706.06083
            model.eval()
            # Pass the original multi-dimensional targets to the attack
            adv_x = pgd_attack(inputs, targets) # The attack will extract labels internally
            model.train()

            optimizer.zero_grad()
            outputs = model(adv_x)
            # Use the extracted identity labels as the target for CrossEntropyLoss
            loss = criterion(outputs, labels)

        elif mode == 'adv_train_trades': # [Ref] https://arxiv.org/abs/1901.08573
            optimizer.zero_grad()
            loss = trades_loss(model=model, x_natural=inputs, y=labels, optimizer=optimizer)


        # elif mode == 'adv_train_mixup': # [Ref] https://arxiv.org/abs/1710.09412
        #     model.eval()
        #     # Mixup needs 1D targets. You would need to modify mixup_data to work with the extracted labels.
        #     benign_inputs, benign_targets_a, benign_targets_b, benign_lam = mixup_data(inputs, labels)
        #     adv_x = pgd_attack(inputs, targets) # Pass original targets to attack
        #     # Mixup needs 1D targets. You would need to modify mixup_data to work with the extracted labels from adv_x?
        #     # This part of mixup with adversarial training might need careful consideration of how targets are handled.
        #     adv_inputs, adv_targets_a, adv_targets_b, adv_lam = mixup_data(adv_x, labels) # Using extracted labels


        #     model.train()
        #     optimizer.zero_grad()

        #     benign_outputs = model(benign_inputs)
        #     adv_outputs = model(adv_inputs)
        #     # Use the extracted 1D labels for criterion
        #     loss_1 = mixup_criterion(criterion, benign_outputs, benign_targets_a, benign_targets_b, benign_lam)
        #     loss_2 = mixup_criterion(criterion, adv_outputs, adv_targets_a, adv_targets_b, adv_lam)

        #     loss = (loss_1 + loss_2) / 2

        else:
            print("No training mode specified.")
            raise ValueError()

        loss.backward()
        optimizer.step()

        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(inputs), len(train_loader) * batch_size,
                       100. * (batch_idx + 1) / len(train_loader), loss.item()))

            wandb.log({f"train_loss {train_loader.dataset}": loss.item()}, step=epoch)


def train(model, train_loader, val_loader_f, val_loader_m, pgd_attack,
          mode='natural', epochs=25, batch_size=256, learning_rate=0.001, momentum=0.9, weight_decay=2e-4,
          checkpoint_path='model1.pt'):

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Using Adam as in your failing block, but only for model

    best_acc = 0.0 # Keep track of best average accuracy across genders

    for epoch in range(epochs):
        # training
        # Pass the extracted labels in train_ep as modified above
        train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, batch_size)

        val_acc_f = 0.0
        val_acc_m = 0.0
        val_loss_f = 0.0
        val_loss_m = 0.0

        if val_loader_f and len(val_loader_f.dataset) > 0:
            val_loss_f, val_acc_f = eval_test_celeba(model, val_loader_f, device, name = 'female')
            robust_loss_f, robust_accuracy_f = eval_robust_celeba(model, val_loader_f, pgd, device, name='female', epoch = epoch)


        if val_loader_m and len(val_loader_m.dataset) > 0:
            val_loss_m, val_acc_m = eval_test_celeba(model, val_loader_m, device, name = 'male')
            robust_loss_m, robust_accuracy_m = eval_robust_celeba(model, val_loader_m, pgd, device, name = 'male', epoch = epoch)



        val_acc = (val_acc_f + val_acc_m) / 2

        # remember best acc@1 and save checkpoint
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)

        # save checkpoint if is a new best
        if is_best:
            torch.save(model.state_dict(), checkpoint_path)
        print(f'Average accuracy: {val_acc:.2f}, female: {val_acc_f:.2f}, male: {val_acc_m:.2f}')

        wandb.log({"val_loss_female": val_loss_f, "val_accuracy_female": val_acc_f,
               "val_loss_male": val_loss_m, "val_accuracy_male": val_acc_m,
               "average_val_accuracy": val_acc}, step=epoch)





def eval_test_celeba(model, dataloader, device, name):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            labels = targets[:, 0] # Extract identity label
            outputs = model(inputs)
            test_loss += F.cross_entropy(outputs, labels).item() * inputs.size(0)
            pred = outputs.max(1, keepdim=True)[1]
            correct += pred.eq(labels.view_as(pred)).sum().item()
            total += inputs.size(0)
    test_loss /= total if total > 0 else 1
    accuracy = 100. * correct / total if total > 0 else 0

    # print(f'Test: Average loss: {test_loss:.4f}, Accuracy: {correct}/{total} ({accuracy:.0f}%)')
    # wandb.log(f"clean_test_loss {name}: {test_loss}", step=epoch)
    # wandb.log(f"clean_test_accuracy {name}: {accuracy}", step=epoch)
    return test_loss, accuracy



def eval_robust_celeba(model, dataloader, pgd_attack, device, name, epoch):
    model.eval()
    robust_loss = 0
    correct = 0
    total = 0


    success_count = 0
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            labels = targets[:, 0] # extract identity label

            outputs_clean = model(inputs)
            pred_clean = outputs_clean.max(1, keepdim=True)[1]


            adv = pgd_attack(inputs, targets)
            outputs_adv = model(adv)
            robust_loss += F.cross_entropy(outputs_adv, labels).item()
            pred_adv = outputs_adv.max(1, keepdim=True)[1]
            correct += pred_adv.eq(labels.view_as(pred_adv)).sum().item()
            total += inputs.size(0)

            # keeping track of successful attacks
            mask = pred_clean == labels
            succesful_attacks = (pred_adv != labels) & mask

            success_count += succesful_attacks.sum().item()


    attack_success_rate = success_count / correct if correct > 0 else 0
    print(f'Attack success rate: {attack_success_rate:.2f}%')
    robust_loss /= len(dataloader.dataset) if total > 0 else 1
    robust_accuracy = 100. * correct / total if total > 0 else 0

    print(f'LinfPGD Attack: Average loss: {robust_loss:.4f}, Robust Accuracy: {robust_accuracy:.0f}%)')

    wandb.log({f"robust_loss_{name}": robust_loss}, step=epoch)
    wandb.log({f"robust_accuracy_{name}": robust_accuracy}, step=epoch)
    wandb.log({f"attack_success_rate_{name}": attack_success_rate}, step=epoch)
    return robust_loss, robust_accuracy


In [None]:
#@title small sanity check

wandb.init(project="face-adv-fairness", name="celeba-sanity-check", config={"learning_rate": 0.001, "epochs": 1})
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = ResNet18(num_classes=1000).to(device) # ResNet for identity classification
val_loader_f = DataLoader(test_subsets_f[0.25], batch_size=64, shuffle=False) # Shuffle usually False for validation
val_loader_m = DataLoader(test_subsets_m[0.25], batch_size=64, shuffle=False) # Shuffle usually False for validation
pgd = LinfPGDAttack(model, epsilon=8/255, step_size = 2/255, steps = 10)

robust_loss, robust_accuracy = eval_robust_celeba(model, val_loader_f, pgd, device, name = 'female', epoch = 0)

KeyboardInterrupt: 

In [None]:
#@title training run: old
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
epsilon = 8/255
training_mode = "adv_train" # Or 'natural' if you want to train naturally
batch_size = 64

proportions = [0.25, 0.5, 0.75]

for proportion in proportions:
    # Re-initialize model and attack for each proportion if needed, otherwise move outside loop
    # If training separately for each proportion, re-initialization is correct.
    model = ResNet18(num_classes=1000).to(device) # ResNet for identity classification
    # Note: number of classes (1000) should match the number of unique identities
    # we filtered initially by top 1000 identitites but this might be limiting perhaps?
    # it gives very few examples on the test set
    # make a new run for each example
    wandb.init(project="face-adv-fairness", name=f"celeba-gender-{proportion}", config={"learning_rate": 0.001, "epochs": 30})


    num_identity_classes = 1000 # Assuming the ResNet18 model is configured for 1000 classes
    model = ResNet18(num_classes=num_identity_classes).to(device)

    pgd = LinfPGDAttack(model, epsilon=epsilon, step_size = epsilon/10, steps = 10)

    # train function definition already includes criterion and optimizer definition.
    # Move best_acc outside the inner epoch loop within the train function.
    # The train function saves checkpoint, so best_acc is managed internally.

    train_loader = DataLoader(train_subsets[proportion], batch_size=batch_size, shuffle=True)

    val_loader_f = None
    val_loader_m = None

    if proportion in test_subsets_f and len(test_subsets_f[proportion].indices) > 0:
        val_loader_f = DataLoader(test_subsets_f[proportion], batch_size=batch_size, shuffle=False) # Shuffle usually False for validation
    if proportion in test_subsets_m and len(test_subsets_m[proportion].indices) > 0:
        val_loader_m = DataLoader(test_subsets_m[proportion], batch_size=batch_size, shuffle=False) # Shuffle usually False for validation


    # call the modified train function
    train(model, train_loader=train_loader, mode=training_mode,
          val_loader_f=val_loader_f, val_loader_m=val_loader_m,
          pgd_attack=pgd, learning_rate=0.001,
          checkpoint_path=f'model_adv_prop{int(proportion*100)}.pt', epochs=20) # Save checkpoints with proportion





cuda




KeyboardInterrupt: 

In [None]:
#@title training run: new, with balanced datasets