<a href="https://colab.research.google.com/github/taweener11/darkSideUnmasked/blob/main/wip_demogpairs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import os

cores = os.cpu_count() # Count the number of cores in a computer
cores

12

In [6]:

#@title shell pipeline for unzipping! this needs to run every time

!unzip -q "/content/drive/My Drive/Datasets/demogpairs/DemogPairs.zip" -d "/content/demogpairs/"

In [7]:
def read_metadata_file(filepath, gender_label, race_label):
    """
    Read a DemogPairs metadata txt file and collect image paths with labels.

    Args:
      filepath (str): path to the metadata txt file
      gender_label (int): 0 for female, 1 for male
      race_label (str): string label for race, e.g. 'black', 'white', 'asian'

    Returns:
      List of tuples: (image_relative_path, gender_label, race_label)
    """
    samples = []
    with open(filepath, 'r') as f:
        lines = f.readlines()
        for line in lines:
            line = line.strip()
            if not line or line.lower().startswith('db_code'):
                continue
            parts = line.split()
            if len(parts) < 2:
                continue
            # parts[1] is the image path relative to DemogPairs folder
            img_path = parts[1]
            samples.append((img_path, gender_label, race_label))
    return samples


In [8]:
import os

# metadata_dir = '/content/drive/My Drive/demogpairs/Metadata'  # adjust path
metadata_dir = '/content/demogpairs/Metadata'  # edited this to run with the local environment



# Map filenames to gender and race labels
metadata_info = {
    'Black_Females.txt': (0, 'black'),
    'Black_Males.txt': (1, 'black'),
    'White_Females.txt': (0, 'white'),
    'White_Males.txt': (1, 'white'),
    'Asian_Females.txt': (0, 'asian'),
    'Asian_Males.txt': (1, 'asian')
}

all_samples = []

for fname, (gender, race) in metadata_info.items():
    full_path = os.path.join(metadata_dir, fname)
    print(f"Reading {full_path} ...")
    samples = read_metadata_file(full_path, gender, race)
    all_samples.extend(samples)

print(f"Total samples loaded: {len(all_samples)}")

Reading /content/demogpairs/Metadata/Black_Females.txt ...
Reading /content/demogpairs/Metadata/Black_Males.txt ...
Reading /content/demogpairs/Metadata/White_Females.txt ...
Reading /content/demogpairs/Metadata/White_Males.txt ...
Reading /content/demogpairs/Metadata/Asian_Females.txt ...
Reading /content/demogpairs/Metadata/Asian_Males.txt ...
Total samples loaded: 10800


In [9]:
import torch
from torchvision import datasets, transforms

In [10]:
# defining a transform that is smaller per suggestion of rasmus

image_size = 64

transform=transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                          std=[0.5, 0.5, 0.5])
])

In [11]:
#@title preliminary analysis on the dataset -- determine the number of examples for subsetting
demogpairs_root = '/content/demogpairs/DemogPairs'

# Build final dataset list with full paths
dataset = []
for rel_path, gender, race in all_samples:
    img_full_path = os.path.join(demogpairs_root, rel_path)
    if os.path.isfile(img_full_path):
        dataset.append((img_full_path, gender, race))
    else:
        print(f"Missing file: {img_full_path}")

print(f"Final dataset size after filtering missing files: {len(dataset)}")

# Extract identity labels from the image paths
# Assuming identity is the directory name right after demogpairs_root
identity_labels_list = []
for img_path, gender, race in dataset:
    # Split the path and get the second to last element (which should be the identity folder)
    parts = img_path.split(os.sep)
    # Find the index of demogpairs_root in the parts
    try:
        root_index = parts.index('DemogPairs')
        # The identity folder is expected to be the element after 'DemogPairs'
        if root_index + 1 < len(parts):
            identity = parts[root_index + 1]
            identity_labels_list.append(identity)
        else:
            # Handle cases where the path doesn't follow the expected structure
            print(f"Warning: Could not extract identity from path: {img_path}")
            identity_labels_list.append("unknown_identity") # Or handle as appropriate
    except ValueError:
        # Handle cases where 'DemogPairs' is not in the path
        print(f"Warning: 'DemogPairs' not found in path: {img_path}")
        identity_labels_list.append("unknown_identity") # Or handle as appropriate


# Convert to a pandas Series for easier counting
import pandas as pd
# Use the created list of identity labels
identity_series = pd.Series(identity_labels_list)

identity_counts = identity_series.value_counts()
# Select top 1000 identities. Ensure there are at least 1000 unique identities.
if len(identity_counts) >= 1000:
    top_1000_identities = identity_counts.nlargest(1000)
else:
    print(f"Warning: Less than 1000 unique identities found. Using all {len(identity_counts)} identities.")
    top_1000_identities = identity_counts

# Get the indices corresponding to the images belonging to the top 1000 identities
# We need the original indices from the `dataset` list
top_1000_identity_names = top_1000_identities.index.tolist()
top_1000_indices = [i for i, (img_path, gender, race) in enumerate(dataset)
                    if img_path.split(os.sep)[-2] in top_1000_identity_names]


# Create a subset of the dataset containing only the top 1000 identities
from torch.utils.data import Subset
# You can create a Subset using the original list and the selected indices
# Note: Subset is typically used with PyTorch Datasets, not plain Python lists.
# If you intend to use this with PyTorch DataLoader later, you might need to
# convert 'dataset' into a custom PyTorch Dataset first.
# For now, let's just have the list of tuples for the top 1000 identities:
dataset_top_1000 = [dataset[i] for i in top_1000_indices]


min_samples = top_1000_identities.min()
max_samples = top_1000_identities.max()

print(f"Minimum samples per identity: {min_samples}")
print(f"Maximum samples per identity: {max_samples}")
print(f"Number of samples in dataset_top_1000: {len(dataset_top_1000)}")

# printing the number of classes per group
for key, value in metadata_info.items():
    gender, race = value
    gender_str = 'male' if gender ==1 else 'female'
    count = len([s for s in dataset if s[1] == gender and s[2] == race])
    print(f'Count of {race} {gender_str} =' + str(count))


Final dataset size after filtering missing files: 10800
Minimum samples per identity: 18
Maximum samples per identity: 18
Number of samples in dataset_top_1000: 10800
Count of black female =1800
Count of black male =1800
Count of white female =1800
Count of white male =1800
Count of asian female =1800
Count of asian male =1800


In [12]:
#@title create a custom PyTorch Dataset for DemogPairs

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import numpy as np

class DemogPairsDataset(Dataset):
    """Custom Dataset for DemogPairs."""

    def __init__(self, samples, transform=None, race_to_int_mapping=None):
        """
        Args:
            samples (list): List of tuples (img_full_path, identity_label, gender_label, race_label).
                           race_label is a string ('black', 'white', 'asian').
            transform (callable, optional): Optional transform to be applied on a sample.
            race_to_int_mapping (dict): Mapping from race string to integer index.
        """
        self.samples = samples
        self.transform = transform
        self.race_to_int_mapping = race_to_int_mapping or {'black': 0, 'white': 1, 'asian': 2} # Default mapping

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, identity_label, gender_label, race_label_str = self.samples[idx]

        # Load image
        img = Image.open(img_path).convert('RGB')

        # Apply transform
        if self.transform:
            img = self.transform(img)

        # Convert race string label to integer using the mapping
        race_label_int = self.race_to_int_mapping.get(race_label_str, -1) # Use -1 for unknown race

        # Return image tensor and a target tensor containing identity, gender, and race
        # Ensure all target components are numerical (identity, gender are already int, race is converted)
        target = torch.tensor([identity_label, gender_label, race_label_int], dtype=torch.long)

        return img, target


demogpairs_root = '/content/demogpairs/DemogPairs'

# Build final dataset list with full paths and extract identities
dataset_with_paths = [] # Store list of (img_full_path, gender, race)
identity_labels_list = [] # Store list of identity folder names

for rel_path, gender, race in all_samples:
    img_full_path = os.path.join(demogpairs_root, rel_path)
    if os.path.isfile(img_full_path):
        dataset_with_paths.append((img_full_path, gender, race))
        # Extract identity label (folder name)
        parts = img_full_path.split(os.sep)
        try:
            root_index = parts.index('DemogPairs')
            if root_index + 1 < len(parts):
                identity = parts[root_index + 1]
                identity_labels_list.append(identity)
            else:
                print(f"Warning: Could not extract identity from path: {img_full_path}")
                identity_labels_list.append("unknown_identity")
        except ValueError:
            print(f"Warning: 'DemogPairs' not found in path: {img_full_path}")
            identity_labels_list.append("unknown_identity")


print(f"Final dataset size after filtering missing files: {len(dataset_with_paths)}")


# Create a mapping from identity folder name to integer ID
unique_identities = sorted(list(set(identity_labels_list))) # Sort for consistent mapping
identity_name_to_int = {name: i for i, name in enumerate(unique_identities)}
num_identity_classes = len(unique_identities)
print(f"Number of unique identity classes: {num_identity_classes}")

# Create a race string to integer mapping for use in the Dataset and in_class
race_to_int = {'black': 0, 'white': 1, 'asian': 2}
int_to_race = {v: k for k, v in race_to_int.items()} # For converting back in in_class


# Rebuild the 'dataset' list to include image path, identity label (int), gender (int), and race (string for now)
# The Dataset's __getitem__ will convert the race string to an int.
dataset = []
for img_full_path, gender, race_str in dataset_with_paths:
    identity_name = img_full_path.split(os.sep)[-2] # Re-extract identity name
    identity_int = identity_name_to_int.get(identity_name, -1) # Get integer ID
    if identity_int != -1: # Only include samples with a valid identity mapping
         dataset.append((img_full_path, identity_int, gender, race_str))
    else:
         print(f"Skipping sample with unknown identity: {img_full_path}")


print(f"Dataset size with identity mapping: {len(dataset)}")



Final dataset size after filtering missing files: 10800
Number of unique identity classes: 600
Dataset size with identity mapping: 10800


In [15]:
#@title subset maker for specified distribution

import numpy as np
import os
from torch.utils.data import Subset
import pandas as pd # Import pandas for value_counts

rng = np.random.default_rng(seed=42)

races = ['black', 'white', 'asian']

# Update the function signature to match the dataset structure (img_full_path, identity_label, gender_label, race_label_str)
def make_train_subsets_from_list(dataset_list, proportions, subgroup = (0, 'asian')):
    """
    Create subsets of the dataset list based on specified proportions
    for a given gender/race subgroup, while maintaining balanced identities.

    Args:
      dataset_list (list): A list of tuples (img_full_path, identity_int, gender_int, race_str)
      proportions (list): A list of proportions (float) for the subgroup in the output subsets.
      subgroup (tuple): (gender_int, race_str) for the subgroup to vary.

    Returns:
      Dict of torch.utils.data.Subset: Subsets of the original list, keyed by proportion.
                                       These Subsets wrap the original dataset_list.
    """
    train_subsets = {}

    # Extract identity labels, gender, and race directly from the input list
    # The input list items are (img_full_path, identity_int, gender_int, race_str)
    dataset_identity_labels_int = [item[1] for item in dataset_list] # identity is at index 1 (int)
    dataset_gender_labels_int = [item[2] for item in dataset_list] # gender is at index 2 (int)
    dataset_race_labels_str = [item[3] for item in dataset_list] # race is at index 3 (str)

    # Calculate base_number based on the minimum samples per identity in this list
    # Use the integer identity labels for value counts
    identity_counts = pd.Series(dataset_identity_labels_int).value_counts()
    base_number = identity_counts.min() if not identity_counts.empty else 0

    # Unique identity integer IDs in this list
    unique_identity_ints = np.unique(dataset_identity_labels_int)


    for prop in proportions:
        selected_original_indices_for_prop = [] # Collect original indices for the current proportion

        # Iterate through the unique integer identity labels
        for identity_int in unique_identity_ints:

            # Indices within the *current dataset_list* that correspond to this identity_int
            # Need to find indices where the identity label (index 1) matches identity_int
            indices_for_identity = [i for i, item in enumerate(dataset_list) if item[1] == identity_int]


            # Separate indices by gender and race *within this identity*
            # Main subgroup indices for this identity
            main_sg_indices_for_identity = [
                idx for idx in indices_for_identity
                if dataset_list[idx][2] == subgroup[0] and dataset_list[idx][3] == subgroup[1] # Check gender (index 2) and race (index 3)
            ]

            rng.shuffle(main_sg_indices_for_identity)

            # Determine number of samples for the main subgroup
            if len(main_sg_indices_for_identity) < base_number:
                 # if an identity has fewer samples than base_number, use its total count as the maximum
                 n_main_sg = int(np.floor(len(main_sg_indices_for_identity) * prop))
            else:
                 # otherwise, use base_number
                n_main_sg = int(np.floor(base_number * prop))


            # Collect the original indices for the selected main subgroup samples
            selected_original_indices_for_prop.extend(main_sg_indices_for_identity[:n_main_sg])

            # selecting for the non-main subgroups *within this identity*
            # Iterate through all possible gender (0, 1) and race (strings) combinations
            for gender in range(2):
                for race_str in races:
                    # Skip the main subgroup
                    if not (gender == subgroup[0] and race_str == subgroup[1]):
                        subgroup_indices_for_identity = [
                            idx for idx in indices_for_identity
                            if dataset_list[idx][2] == gender and dataset_list[idx][3] == race_str # Check gender (index 2) and race (index 3)
                        ]
                        rng.shuffle(subgroup_indices_for_identity)

                        # Calculate how many samples from this subgroup to select
                        # Need to count how many *other* subgroups are present for *this identity*
                        available_other_subgroups_count = 0
                        for g_other in range(2):
                            for r_other in races:
                                # Check if this gender+race combination is present for the current identity
                                if any(dataset_list[idx][2] == g_other and dataset_list[idx][3] == r_other for idx in indices_for_identity):
                                     if not (g_other == subgroup[0] and r_other == subgroup[1]): # Exclude the main subgroup itself
                                        available_other_subgroups_count += 1


                        if available_other_subgroups_count > 0:
                             # Distribute the remaining (1-prop) samples equally among available other subgroups for this identity
                             target_per_other_subgroup = int(np.floor((base_number * (1-prop)) / available_other_subgroups_count))
                        else:
                             target_per_other_subgroup = 0


                        # Number of samples to select for this specific non-main subgroup, up to available count
                        n_subgroup = min(len(subgroup_indices_for_identity), target_per_other_subgroup)


                        # Collect the original indices for the selected non-main subgroup samples
                        selected_original_indices_for_prop.extend(subgroup_indices_for_identity[:n_subgroup])

        # Shuffle the collected original indices for the current proportion
        rng.shuffle(selected_original_indices_for_prop)

        # Create the Subset using the original list and the selected original indices
        train_subsets[prop] = Subset(dataset_list, selected_original_indices_for_prop)

    return train_subsets

# %%
#@title sanity check
from torch.utils.data import DataLoader

# Before creating the subsets, you need a list to create subsets *from*.
# The previous cell created 'dataset' which is a list of (img_full_path, identity_int, gender_int, race_str)
# You also defined train_indices and test_indices.
# Let's create the training list to pass to make_train_subsets_from_list

train_dataset_list = [dataset[i] for i in train_indices] # List of (img_full_path, identity_int, gender_int, race_str) for the training split

# Now call the function with this list
train_subsets = make_train_subsets_from_list(train_dataset_list, [0.25, 0.5, 0.75], subgroup = (0, 'asian')) # subgroup is (gender_int, race_str)

# get the first sample (index 0) from the Subset
# The structure of the items in the subset is still (img_full_path, identity_int, gender_int, race_str)
first_sample = train_subsets[0.25][0]

print("First sample (path, identity_int, gender_int, race_str):")
print(first_sample)

# If you want to see more samples, you can loop
print("\nFirst 3 samples:")
for i in range(min(3, len(train_subsets[0.25]))):
    print(train_subsets[0.25][i])

# To use this with a DataLoader and apply transformations, you need to wrap the Subset
# in your custom DemogPairsDataset. The DemogPairsDataset expects a list of samples.
# A Subset behaves like a list, providing access to elements of the original list.
# So, you can pass a Subset directly to the DemogPairsDataset constructor.

# Re-using the transform defined earlier (ipython-input-6)
# Re-using race_to_int mapping defined in ipython-input-11
# num_identity_classes defined in ipython-input-11

batch_size = 64
proportions = [0.25, 0.5, 0.75]
train_demogpair_datasets = {}
train_demogpair_loaders = {}
for prop in proportions:
    train_demogpair_datasets[prop] = DemogPairsDataset(samples=train_subsets[prop], transform=transform, race_to_int_mapping=race_to_int)
    train_demogpair_loaders[prop] = DataLoader(train_demogpair_datasets[prop], batch_size, shuffle=False)

# Do the same for the test set.
test_dataset_list = [dataset[i] for i in test_indices] # List of (img_full_path, identity_int, gender_int, race_str) for the test split

# Create a DemogPairsDataset for the test list
test_dataset_full = DemogPairsDataset(samples=test_dataset_list, transform=transform, race_to_int_mapping=race_to_int)

# Create a DataLoader for the test dataset
test_loader = DataLoader(test_dataset_full, batch_size, shuffle=False)

first_batch_test = next(iter(test_loader))
print("\nFirst batch from Test DataLoader (transformed image tensor, target tensor):")
# The target tensor from DemogPairsDataset should be [identity_label, gender_label, race_label_int]
print(first_batch_test)

First sample (path, identity_int, gender_int, race_str):
('/content/demogpairs/DemogPairs/lenda_murray/0073_01.jpg', 311, 0, 'black')

First 3 samples:
('/content/demogpairs/DemogPairs/lenda_murray/0073_01.jpg', 311, 0, 'black')
('/content/demogpairs/DemogPairs/shun_oguri/079.jpg', 496, 1, 'asian')
('/content/demogpairs/DemogPairs/josh_cooke/019.jpg', 247, 1, 'white')

First batch from Test DataLoader (transformed image tensor, target tensor):
[tensor([[[[-0.2078, -0.1686,  0.0667,  ..., -0.1922, -0.1765, -0.1529],
          [-0.1765, -0.1529,  0.2000,  ..., -0.2000, -0.1765, -0.1451],
          [-0.1216,  0.0039,  0.2627,  ..., -0.2078, -0.1686, -0.1373],
          ...,
          [ 0.7882,  0.8588,  0.8118,  ..., -0.7098, -0.7412, -0.7804],
          [ 0.9451,  0.9216,  0.8431,  ..., -0.7333, -0.7569, -0.8039],
          [ 0.9686,  0.9216,  0.8667,  ..., -0.7412, -0.7569, -0.7961]],

         [[-0.5451, -0.5843, -0.5373,  ..., -0.5529, -0.5373, -0.5216],
          [-0.5451, -0.5686, -

In [None]:
#@title subset maker from dataset

import numpy as np
import os
from torch.utils.data import Subset
import pandas as pd # Import pandas for value_counts

rng = np.random.default_rng(seed=42)

races = ['black', 'white', 'asian']

def make_train_subsets_from_dataset(dataset, proportions, subgroup = (0, 'asian')):
    """
    Read a DemogPairs metadata txt file and collect image paths with labels.

    Args:
      dataset_list (list): A Demogpairs dataset object
      proportions (list): A list of proportions for the subgroup.
      subgroup (tuple): (gender, race) for the subgroup to vary.

    Returns:
      Dict of torch.utils.data.Subset: Subsets of the original list, keyed by proportion.
    """
    train_subsets = {}

    # Extract identity labels, gender, and race directly from the input list
    dataset_identity_labels = [img_path.split(os.sep)[-2] for img_full_path, _, _ in dataset_list] # Assuming identity is the folder name
    dataset_gender_labels = [gender for _, gender, _ in dataset]
    dataset_race_labels = [race for _, _, race in dataset]

    # Calculate base_number based on the minimum samples per identity in this list
    identity_counts = pd.Series(dataset_identity_labels).value_counts()
    base_number = identity_counts.min() if not identity_counts.empty else 0

    # Map original identity names to a numerical label for easier processing
    unique_identities = np.unique(dataset_identity_labels)
    identity_mapping = {name: i for i, name in enumerate(unique_identities)}
    numerical_identity_labels = np.array([identity_mapping[name] for name in dataset_identity_labels])

    for prop in proportions:
        selected_original_indices_for_prop = [] # Collect original indices for the current proportion

        # the indices 'c' here refer to the numerical identity labels
        for c_num in np.unique(numerical_identity_labels):
            # Get the actual identity name
            identity_name = unique_identities[c_num]

            # Indices within the *current dataset_list* that correspond to identity 'c_num'
            indices_for_identity = np.where(numerical_identity_labels == c_num)[0]

            # Separate indices by gender and race *within this identity*
            main_sg_indices_for_identity = [
                idx for idx in indices_for_identity
                if dataset_gender_labels[idx] == subgroup[0] and dataset_race_labels[idx] == subgroup[1]
            ]

            rng.shuffle(main_sg_indices_for_identity)

            # Determine number of samples for the main subgroup
            if len(main_sg_indices_for_identity) < base_number:
                n_main_sg = int(np.floor(len(main_sg_indices_for_identity) * prop))
            else:
                n_main_sg = int(np.floor(base_number * prop))

            # Collect the original indices for the selected main subgroup samples
            selected_original_indices_for_prop.extend(main_sg_indices_for_identity[:n_main_sg])

            # selecting for the non-main subgroups *within this identity*
            for gender in range(2):
                for race in races:
                    if race != subgroup[1]:
                        subgroup_indices_for_identity = [
                            idx for idx in indices_for_identity
                            if dataset_gender_labels[idx] == gender and dataset_race_labels[idx] == race
                        ]
                        rng.shuffle(subgroup_indices_for_identity)

                        # Calculate how many samples from this subgroup to select
                        available_other_subgroups_count = 0
                        for g_other in range(2):
                            for r_other in races:
                                if r_other != subgroup[1]:
                                     if any(dataset_gender_labels[idx] == g_other and dataset_race_labels[idx] == r_other for idx in indices_for_identity):
                                         available_other_subgroups_count += 1

                        if available_other_subgroups_count > 0:
                            target_per_other_subgroup = int(np.floor((base_number * (1-prop)) / available_other_subgroups_count))
                        else:
                             target_per_other_subgroup = 0

                        # Number of samples to select for this specific non-main subgroup
                        n_subgroup = min(len(subgroup_indices_for_identity), target_per_other_subgroup)

                        # Collect the original indices for the selected non-main subgroup samples
                        selected_original_indices_for_prop.extend(subgroup_indices_for_identity[:n_subgroup])

        # Shuffle the collected original indices for the current proportion
        rng.shuffle(selected_original_indices_for_prop)

        # Create the Subset using the original list and the selected original indices
        train_subsets[prop] = Subset(dataset_list, selected_original_indices_for_prop)

    return train_subsets


In [16]:
#@title pipeline for wandb

import wandb

In [17]:
import torch.nn as nn
import torch.nn.functional as F

## Various utility functions (not in utils yet)

In [18]:
def trades_loss(model,
                x_natural,
                y,
                optimizer,
                step_size=8/2550,
                epsilon=8/255,
                perturb_steps=10,
                beta=1.0):
    '''
    Source https://github.com/yaodongyu/TRADES/blob/master/trades.py
    '''
    # define KL-loss
    criterion_kl = nn.KLDivLoss(size_average=False)
    model.eval()
    batch_size = len(x_natural)

    # generate adversarial example
    x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
    for _ in range(perturb_steps):
        x_adv.requires_grad_()
        with torch.enable_grad():
            loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                   F.softmax(model(x_natural), dim=1))
        grad = torch.autograd.grad(loss_kl, [x_adv])[0]
        x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
        x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
        x_adv = torch.clamp(x_adv, 0.0, 1.0)

    model.train()

    x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)

    # zero gradient
    optimizer.zero_grad()

    # calculate robust loss
    logits = model(x_natural)
    loss_natural = F.cross_entropy(logits, y)
    loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                                    F.softmax(model(x_natural), dim=1))
    loss = loss_natural + beta * loss_robust
    return loss

In [19]:
class LinfPGDAttack(nn.Module):
    def __init__(self, model, epsilon, steps=10, step_size=0.003):
        super().__init__()
        self.model = model
        self.epsilon = epsilon
        self.steps = steps
        self.step_size = step_size

    def perturb(self, x_natural, y):
        x_adv = x_natural.clone().requires_grad_(True)
        with torch.enable_grad():
            for i in range(self.steps):

                self.model.zero_grad()
                # calculate loss
                output = self.model(x_adv)
                # Selecting the first column of y (assuming it's the identity label)
                loss = nn.CrossEntropyLoss()(output, y[:, 0])

                # gradient
                grad = torch.autograd.grad(loss, x_adv)[0]

                # clipping
                perturbation = torch.clamp(self.step_size * torch.sign(grad), -self.epsilon, self.epsilon)

                # clamping
                x_adv = torch.clamp(x_adv + perturbation, 0, 1)

        return x_adv

    def forward(self, x_natural, y):
        x_adv = self.perturb(x_natural, y)
        return x_adv

In [20]:
#@title initializing a wandb run

# api key: bd1c08839d0c8c49e7c3efe9aabe2d9c644befb6

wandb.init(project="face-adv-fairness", name="demogpairs-demo", config={"learning_rate": 0.001, "epochs": 20})
wandb.finish()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33midilks[0m ([33midilks-dartmouth[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [21]:
#@title utils: pgd-attack

import torch.nn as nn
import torch.nn.functional as F

class LinfPGDAttack(nn.Module):
    def __init__(self, model, epsilon, steps=10, step_size=0.003):
        super().__init__()
        self.model = model
        self.epsilon = epsilon
        self.steps = steps
        self.step_size = step_size

    def perturb(self, x_natural, y):
        """
        Computes the gradient of the cross-entropy loss with respect to the input
        image `x_adv` and updates the image based on the gradient direction. The
        perturbation is clipped to ensure it stays within a specified epsilon range
        and is finally clamped to ensure pixel values are valid.

        The resulting perturbed image is returned.
        """
        x_adv = x_natural.clone().requires_grad_(True)
        with torch.enable_grad():
            for i in range(self.steps):

                self.model.zero_grad()
                # calculate loss
                output = self.model(x_adv)
                loss = nn.CrossEntropyLoss()(output, y)


                # gradient
                grad = torch.autograd.grad(loss, x_adv)[0]


                # clipping
                perturbation = torch.clamp(self.step_size * torch.sign(grad), -self.epsilon, self.epsilon)

                # clamping
                x_adv = torch.clamp(x_adv + perturbation, 0, 1)


        return x_adv

    def forward(self, x_natural, y):
        x_adv = self.perturb(x_natural, y)
        return x_adv

In [22]:
#@title utils: eval_test, eval_robust

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim



def eval_test(model, test_loader, device):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            test_loss += F.cross_entropy(outputs, targets).item()
            pred = outputs.max(1, keepdim=True)[1]
            correct += pred.eq(targets.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)

    print('Test: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_accuracy = 100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy


def eval_robust(model, test_loader, pgd_attack, device):
    model.eval()
    robust_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            adv = pgd_attack(inputs, targets)
            outputs = model(adv)
            robust_loss += F.cross_entropy(outputs, targets).item()
            pred = outputs.max(1, keepdim=True)[1]
            correct += pred.eq(targets.view_as(pred)).sum().item()
    robust_loss /= len(test_loader.dataset)

    print('LinfPGD Attack: Average loss: {:.4f}, Robust Accuracy: {}/{} ({:.0f}%)'.format(
        robust_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    robust_accuracy = 100. * correct / len(test_loader.dataset)
    return robust_loss, robust_accuracy


def mixup_data(x, y, mixup_alpha=1.0):
    '''
    Source https://github.com/facebookresearch/mixup-cifar10/blob/main/train.py
    '''
    lam = np.random.beta(mixup_alpha, mixup_alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]

    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    '''
    Source https://github.com/facebookresearch/mixup-cifar10/blob/main/train.py
    '''
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


def trades_loss(model,
                x_natural,
                y,
                optimizer,
                step_size=0.003,
                epsilon=8/255,
                perturb_steps=10,
                beta=1.0):
    '''
    Source https://github.com/yaodongyu/TRADES/blob/master/trades.py
    '''
    # define KL-loss
    criterion_kl = nn.KLDivLoss(size_average=False)
    model.eval()
    batch_size = len(x_natural)

    # generate adversarial example
    x_adv = x_natural.detach() + 0.001 * torch.randn(x_natural.shape).cuda().detach()
    for _ in range(perturb_steps):
        x_adv.requires_grad_()
        with torch.enable_grad():
            loss_kl = criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                   F.softmax(model(x_natural), dim=1))
        grad = torch.autograd.grad(loss_kl, [x_adv])[0]
        x_adv = x_adv.detach() + step_size * torch.sign(grad.detach())
        x_adv = torch.min(torch.max(x_adv, x_natural - epsilon), x_natural + epsilon)
        x_adv = torch.clamp(x_adv, 0.0, 1.0)

    model.train()

    x_adv = Variable(torch.clamp(x_adv, 0.0, 1.0), requires_grad=False)

    # zero gradient
    optimizer.zero_grad()

    # calculate robust loss
    logits = model(x_natural)
    loss_natural = F.cross_entropy(logits, y)
    loss_robust = (1.0 / batch_size) * criterion_kl(F.log_softmax(model(x_adv), dim=1),
                                                    F.softmax(model(x_natural), dim=1))
    loss = loss_natural + beta * loss_robust
    return loss

In [23]:
def train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, batch_size):
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        labels = targets[:, 0] # the first column is the identity label

        if mode == 'natural':
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        elif mode == 'adv_train': # [Ref] https://arxiv.org/abs/1706.06083
            model.eval()
            adv_x = pgd_attack(inputs, targets)
            model.train()

            optimizer.zero_grad()
            outputs = model(adv_x)
            loss = criterion(outputs, targets)

        elif mode == 'adv_train_trades': # [Ref] https://arxiv.org/abs/1901.08573
            optimizer.zero_grad()
            loss = trades_loss(model=model, x_natural=inputs, y=targets, optimizer=optimizer)

        # elif mode == 'adv_train_mixup': # [Ref] https://arxiv.org/abs/1710.09412
        #     model.eval()
        #     benign_inputs, benign_targets_a, benign_targets_b, benign_lam = mixup_data(inputs, targets)
        #     adv_x = pgd_attack(inputs, targets)
        #     adv_inputs, adv_targets_a, adv_targets_b, adv_lam = mixup_data(adv_x, targets)

        #     model.train()
        #     optimizer.zero_grad()

        #     benign_outputs = model(benign_inputs)
        #     adv_outputs = model(adv_inputs)
        #     loss_1 = mixup_criterion(criterion, benign_outputs, benign_targets_a, benign_targets_b, benign_lam)
        #     loss_2 = mixup_criterion(criterion, adv_outputs, adv_targets_a, adv_targets_b, adv_lam)

        #     loss = (loss_1 + loss_2) / 2

        else:
            print("No training mode specified.")
            raise ValueError()

        loss.backward()
        optimizer.step()

        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(inputs), len(train_loader) * batch_size,
                       100. * (batch_idx + 1) / len(train_loader), loss.item()))



In [24]:
#@title resnet module

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import torch.optim as optim


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out



class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512 * block.expansion * 4, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18(num_classes=10):
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes)


In [31]:
#@title in-class statistics (without attack success)

# recall that we have all the statistics in metadata_info.items()


def in_class_clean(predict, label, classes, metadata_info, epoch):
    """
    Calculate accuracy per gender+race group.

    Args:
      predict (torch.Tensor): Tensor of predicted class labels.
      label (torch.Tensor): Tensor of true labels.
                          Assumed to contain identity labels (0th column),
                          gender (1st column), and race (2nd column).
      classes (int): Total number of identity classes.
      metadata_info (dict): Dictionary mapping metadata filenames to (gender, race) tuples.

    Returns:
      Dict: Accuracy per gender+race group, keyed by '{race}_{gender_str}'.
    """
    group_stats = {}

    # Iterate through each gender and race group defined in metadata_info
    for _, (gender, race) in metadata_info.items():
        gender_str = 'male' if gender == 1 else 'female'
        group_key = f'{race}_{gender_str}'
        group_stats[group_key] = {'correct': 0, 'total': 0}

        # Find samples that belong to this specific gender and race group
        # Assuming label tensor has shape (batch_size, 3) -> [identity, gender, race]
        # Filter by gender (index 1) and race (index 2)
        # Note: Assuming race labels in label tensor match the string keys in metadata_info
        is_in_group = (label[:, 1] == gender) & (label[:, 2] == race)

        # Select predictions and labels only for samples in this group
        group_predict = predict[is_in_group]
        group_label = label[is_in_group][:, 0] # Get identity label for this group

        # Calculate correct predictions within this group
        correct_predictions_in_group = (group_predict == group_label)
        num_correct = torch.sum(correct_predictions_in_group).item()
        num_total = group_label.size(0) # Number of samples in this group

        # Store the counts
        group_stats[group_key]['correct'] = num_correct
        group_stats[group_key]['total'] = num_total

    # Calculate accuracy for each group after iterating through all samples
    group_accuracy = {}
    for group_key, stats in group_stats.items():
        accuracy = stats['correct'] / stats['total'] if stats['total'] > 0 else 0.0
        group_accuracy[group_key] = accuracy
        wandb.log({f"accuracy_{group_key}": accuracy}, step=epoch)

    return group_accuracy

In [26]:
#@title in-class statistics

# recall that we have all the statistics in metadata_info.items()


def in_class(predict, label, classes, metadata_info, epoch, pred_clean):
    """
    Calculate accuracy per gender+race group.

    Args:
      predict (torch.Tensor): Tensor of predicted class labels (identity).
      label (torch.Tensor): Tensor of true labels.
                          Assumed to contain identity labels (0th column),
                          gender (1st column), and race (2nd column).
      classes (int): Total number of identity classes (not directly used here, but good for context).
      metadata_info (dict): Dictionary mapping metadata filenames to (gender, race) tuples.

    Returns:
      Dict: Accuracy per gender+race group, keyed by '{race}_{gender_str}'.
            Logs group accuracies to wandb.
    """
    group_stats = {}
    group_attack_success = {}
    group_accuracy = {}


    # Calculate accuracy for each group after iterating through all samples (this should happen after the loop over batches in the eval function)
    # The current structure calculates accuracy per batch and logs it, which might not be what you want.
    # Let's adjust in_class to aggregate stats over all batches and then calculate/log accuracy once.

    # --- Revised in_class to aggregate stats ---
    # The evaluation functions (eval_test_celeba, eval_robust_celeba) should collect
    # all predictions and all labels, then call in_class once.

    # This function will now calculate accuracy given *all* predictions and labels from the epoch
    # It should be called *outside* the batch loop in eval functions.

    # Reinitialize group_stats for aggregation over the entire dataset passed to this function
    group_stats = {}
    unique_groups = set(metadata_info.values())
    for gender, race_str in unique_groups:
        gender_str = 'male' if gender == 1 else 'female'
        group_key = f'{race_str}_{gender_str}'
        group_stats[group_key] = {'correct': 0, 'total': 0}
        group_attack_success[group_key] = 0

        # Iterate through each sample in the aggregated label tensor
        for i in range(label.size(0)):
            sample_identity = label[i, 0].item()
            sample_gender = label[i, 1].item()
            sample_race = label[i, 2] # Assuming this is the string or index

            # keeping track of successful attacks (clean prediction was correct, but adv prediction was wrong)
            mask = pred_clean.view_as(sample_identity) == sample_identity # Reshape pred_clean to match labels
            succesful_attacks = (predict.view_as(sample_identity) != sample_identity) & mask
            success_count += succesful_attacks.sum().item()


            # Check if this sample belongs to the current group
            # Need to handle potential string comparison or index mapping for race
            # Assuming label[:, 2] is a tensor of strings matching 'black', 'white', 'asian'
            # If not, adjust the comparison here.
            try:
                 is_in_group_race = (sample_race == race_str)
            except Exception: # Catch potential errors if sample_race is not a string
                 # Fallback: if sample_race is an integer index, you need a mapping
                 # Example: if race_map_int maps {0: 'black', 1: 'white', 2: 'asian'}
                 # is_in_group_race = (race_map_int.get(sample_race.item()) == race_str)
                 print(f"Warning: Could not compare race label {sample_race} (type {type(sample_race)}) with group race string {race_str}. Assuming race is not directly comparable.")
                 is_in_group_race = False # Assume not in group if cannot compare


            if sample_gender == gender and is_in_group_race:
                 group_stats[group_key]['total'] += 1
                 if predict[i].item() == sample_identity:
                     group_stats[group_key]['correct'] += 1
                     group_attack_success[group_key] += 1


    # Calculate accuracy for each group after aggregating all samples
    group_accuracy = {}
    group_attack_success_rate = {}
    for group_key, stats in group_stats.items():
        accuracy = stats['correct'] / stats['total'] if stats['total'] > 0 else 0.0
        success_rate = group_attack_success[group_key] / stats['total'] if stats['total'] > 0 else 0.0
        group_accuracy[group_key] = accuracy
        # Log group accuracies *per epoch*
        wandb.log({f"accuracy_{group_key}": accuracy}, step=epoch)
        wandb.log({f"attack_success_rate_{group_key}": success_rate}, step=epoch)

    # calculate attack success for each group

    return group_accuracy, group_attack_success_rate




In [37]:
#@title this works but unsure if it is accurate

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np
import wandb
import pandas as pd # Import pandas for value_counts

# recall that we have all the statistics in metadata_info.items()
# metadata_info is globally available as defined earlier.

def in_class(predict_adv, label_full, metadata_info, epoch, predict_clean):
    """
    Calculate accuracy and attack success rate per gender+race group.

    Args:
      predict_adv (torch.Tensor): Tensor of predicted class labels (identity) on adversarial examples.
                                  Shape (N,) where N is the total number of samples.
      label_full (torch.Tensor): Tensor of true labels.
                                 Shape (N, 3) with columns [identity, gender, race_int].
      metadata_info (dict): Dictionary mapping metadata filenames to (gender, race_str) tuples.
                            Used to define groups.
      epoch (int): The current training epoch for logging purposes.
      predict_clean (torch.Tensor): Tensor of predicted class labels (identity) on clean examples.
                                   Shape (N,) where N is the total number of samples.

    Returns:
      Tuple: Dict of robust accuracy per group, Dict of attack success rate per group.
            Logs group metrics to wandb.
    """
    group_stats = {}
    group_attack_success = {}
    group_accuracy = {}
    group_attack_success_rate = {}


    # create a mapping from integer race label (from label_full) back to race string
    # this mapping was created earlier in the notebook: int_to_race
    # int_to_race = {0: 'black', 1: 'white', 2: 'asian'}

    unique_groups = set(metadata_info.values()) # e.g., {(0, 'black'), (1, 'black'), ...}

    for gender, race_str in unique_groups:
        gender_str = 'male' if gender == 1 else 'female'
        group_key = f'{race_str}_{gender_str}'

        # find samples that belong to this specific gender and race group using tensor operations
        # label_full[:, 1] is gender (int), label_full[:, 2] is race (int)
        # need to convert the race_str from metadata_info to an integer using race_to_int
        # assuming race_to_int = {'black': 0, 'white': 1, 'asian': 2}
        try:
            race_int = race_to_int[race_str]
        except KeyError:
             print(f"Warning: Race string '{race_str}' from metadata_info not found in race_to_int mapping. Skipping group {group_key}.")
             continue #skip this group if mapping is missing

        is_in_group = (label_full[:, 1] == gender) & (label_full[:, 2] == race_int)

        # select predictions and labels only for samples in this group
        group_predict_adv = predict_adv[is_in_group]
        group_predict_clean = predict_clean[is_in_group]
        group_label_identity = label_full[is_in_group][:, 0] # Get identity label for this group

        num_total = group_label_identity.size(0) # Number of samples in this group

        if num_total > 0:
            # Calculate correct predictions within this group for adversarial examples (Robust Accuracy)
            correct_predictions_adv = (group_predict_adv == group_label_identity)
            num_correct_adv = torch.sum(correct_predictions_adv).item()
            group_accuracy[group_key] = num_correct_adv / num_total

            # Calculate successful attacks within this group
            # Successful attack: Clean prediction was correct, but adversarial prediction was wrong.
            clean_was_correct = (group_predict_clean == group_label_identity)
            adv_was_wrong = (group_predict_adv != group_label_identity)
            successful_attacks_in_group = clean_was_correct & adv_was_wrong
            num_successful_attacks = torch.sum(successful_attacks_in_group).item()
            group_attack_success_rate[group_key] = num_successful_attacks / num_total

            # Log group metrics to wandb
            print(f"Robust accuracy for {group_key}: {group_accuracy[group_key]}")
            print(f"Attack success rate for {group_key}: {group_attack_success_rate[group_key]}")
            wandb.log({f"robust_accuracy_{group_key}": group_accuracy[group_key]}, step=epoch)
            wandb.log({f"attack_success_rate_{group_key}": group_attack_success_rate[group_key]}, step=epoch)
        else:
            group_accuracy[group_key] = 0.0
            group_attack_success_rate[group_key] = 0.0
            # Log 0 if the group is empty to ensure the metric appears in wandb
              # Log group metrics to wandb
            print(f"Robust accuracy for {group_key}: {group_accuracy[group_key]}")
            print(f"Attack success rate for {group_key}: {group_attack_success_rate[group_key]}")
            wandb.log({f"robust_accuracy_{group_key}": 0.0}, step=epoch)
            wandb.log({f"attack_success_rate_{group_key}": 0.0}, step=epoch)


    return group_accuracy, group_attack_success_rate


def eval_test_demogpairs(model, dataloader, device, name, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_labels = [] # To store full targets for group evaluation

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            labels = targets[:, 0] # Extract identity label
            outputs = model(inputs)
            test_loss += F.cross_entropy(outputs, labels, reduction='sum').item() # Use reduction='sum' to sum loss over the batch
            pred = outputs.max(1, keepdim=True)[1].squeeze() # Squeeze to get shape (batch_size,)
            correct += pred.eq(labels.view_as(pred)).sum().item()
            total += inputs.size(0)

            all_preds.append(pred.cpu())
            all_labels.append(targets.cpu()) # Append full targets for group evaluation

    test_loss /= total if total > 0 else 1
    accuracy = 100. * correct / total if total > 0 else 0

    print(f'Test ({name}): Average loss: {test_loss:.4f}, Accuracy: {correct}/{total} ({accuracy:.0f}%)')

    # Calculate group accuracies
    # Concatenate all batches' predictions and labels
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    # Pass full labels (with gender and race) to in_class_clean
    # Note: in_class_clean does not use predict_clean, so we don't pass it.
    group_accuracy_clean = in_class_clean(predict=all_preds, label=all_labels, classes=model.linear.out_features, metadata_info=metadata_info, epoch=epoch)

    wandb.log({f"clean_test_loss {name}": test_loss}, step=epoch)
    wandb.log({f"clean_test_accuracy {name}": accuracy}, step=epoch)

    # Log group accuracies to wandb within in_class_clean function
    # Returning only overall loss and accuracy here as in_class_clean logs group stats
    return test_loss, accuracy

def eval_robust_demogpairs(model, dataloader, pgd_attack, device, name, epoch):
    model.eval()
    robust_loss = 0
    correct = 0
    total = 0

    # Need to collect both clean and adversarial predictions for the whole dataset
    all_preds_clean = []
    all_preds_adv = []
    all_labels_full = [] # To store full targets for group evaluation

    with torch.no_grad(): # Keep this outside the loop if pgd_attack generates adv examples without tracking gradients
        for batch_idx, (inputs, targets) in enumerate(dataloader):
            inputs, targets = inputs.to(device), targets.to(device)
            labels = targets[:, 0] # extract identity label (int)

            # Get clean predictions
            outputs_clean = model(inputs)
            pred_clean = outputs_clean.max(1, keepdim=True)[1].squeeze() # Squeeze to get shape (batch_size,)
            all_preds_clean.append(pred_clean.cpu()) # Collect clean predictions


            # Generate adversarial examples
            # Need to enable gradients temporarily for the attack
            with torch.enable_grad():
                 adv = pgd_attack(inputs, labels) # Pass identity labels for attack loss

            # Evaluate on adversarial examples
            outputs_adv = model(adv)
            # Use reduction='sum' to sum loss over the batch
            robust_loss += F.cross_entropy(outputs_adv, labels, reduction='sum').item()
            pred_adv = outputs_adv.max(1, keepdim=True)[1].squeeze() # Squeeze to get shape (batch_size,)
            correct += pred_adv.eq(labels.view_as(pred_adv)).sum().item()
            total += inputs.size(0)

            all_preds_adv.append(pred_adv.cpu())
            all_labels_full.append(targets.cpu()) # Store full targets

    robust_loss /= total if total > 0 else 1
    robust_accuracy = 100. * correct / total if total > 0 else 0

    print(f'Robust Test ({name}): Average loss: {robust_loss:.4f}, Robust Accuracy: {correct}/{total} ({robust_accuracy:.0f}%)')

    # Calculate group robust accuracies and attack success rates
    all_preds_clean = torch.cat(all_preds_clean)
    all_preds_adv = torch.cat(all_preds_adv)
    all_labels_full = torch.cat(all_labels_full)

    # Pass all aggregated data to in_class
    group_accuracy_robust, group_attack_success_rate = in_class(
        predict_adv=all_preds_adv,
        label_full=all_labels_full,
        metadata_info=metadata_info,
        epoch=epoch,
        predict_clean=all_preds_clean # Pass clean predictions
    )

    wandb.log({f"robust_loss_{name}": robust_loss}, step=epoch)
    wandb.log({f"robust_accuracy_{name}": robust_accuracy}, step=epoch)
    # Overall attack success rate needs clean predictions too
    # Overall attack success rate: Number of samples where clean pred was right, but adv pred was wrong / total samples where clean pred was right
    total_clean_correct = torch.sum(all_preds_clean == all_labels_full[:, 0]).item()
    total_successful_attacks = torch.sum((all_preds_clean == all_labels_full[:, 0]) & (all_preds_adv != all_labels_full[:, 0])).item()
    overall_attack_success_rate = 100. * total_successful_attacks / total_clean_correct if total_clean_correct > 0 else 0.0

    print(f'Overall Attack success rate ({name}): {overall_attack_success_rate:.2f}%')
    wandb.log({f"overall_attack_success_rate_{name}": overall_attack_success_rate}, step=epoch)


    # Log robust group accuracies and attack success rates within in_class function
    # Returning only overall loss and accuracy here as in_class logs group stats
    return robust_loss, robust_accuracy


def train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, batch_size):
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        # Extract the identity label from the multi-dimensional target tensor
        labels = targets[:, 0] # Assuming the first column is the identity label


        if mode == 'natural':
            optimizer.zero_grad()
            outputs = model(inputs)
            # Use the extracted identity labels as the target for CrossEntropyLoss
            loss = criterion(outputs, labels)

        elif mode == 'adv_train': # [Ref] https://arxiv.org/abs/1706.06083
            model.eval()
            # Pass the original multi-dimensional targets to the attack
            # The LinfPGDAttack perturbs based on identity label loss, so it only needs identity label for loss calculation.
            # Let's update LinfPGDAttack to expect only identity labels as target for loss calculation
            # Ensure pgd_attack expects identity labels
            adv_x = pgd_attack(inputs, labels) # Pass only identity labels to the attack
            model.train()

            optimizer.zero_grad()
            outputs = model(adv_x)
            # Use the extracted identity labels as the target for CrossEntropyLoss
            loss = criterion(outputs, labels)

        elif mode == 'adv_train_trades': # [Ref] https://arxiv.org/abs/1901.08573
            optimizer.zero_grad()
            # TRADES loss function already handles internally evaluation mode for adversarial example generation
            # and expects only identity labels for the inner loss calculation (KL divergence).
            # It takes the natural targets `y` for the natural loss and KL divergence target.
            # Ensure trades_loss function expects identity labels for `y`
            loss = trades_loss(model=model, x_natural=inputs, y=labels, optimizer=optimizer, step_size=pgd_attack.step_size, epsilon=pgd_attack.epsilon, perturb_steps=pgd_attack.steps)


        else:
            print("No training mode specified.")
            raise ValueError()

        loss.backward()
        optimizer.step()

        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{:05d}/{} ({:.0f}%)]\t Loss: {:.6f}'.format(
                epoch, (batch_idx + 1) * len(inputs), len(train_loader.dataset), # Use len(train_loader.dataset) for total samples
                       100. * (batch_idx + 1) / len(train_loader), loss.item()))

            # Ensure train_loader.dataset can be converted to string for logging or use a predefined name
            try:
                # Try to get a meaningful name if the dataset is a Subset, otherwise use a default
                if isinstance(train_loader.dataset, Subset):
                    # Attempt to identify the underlying dataset or its source
                    dataset_name = "train_subset" # Default for Subset
                    # More specific name might require inspection of the Subset's dataset attribute
                    # e.g., if the underlying dataset has a name or identifier
                else:
                   dataset_name = type(train_loader.dataset).__name__ # Use class name
            except Exception:
                dataset_name = "train_dataset" # Fallback name
            wandb.log({f"train_loss {dataset_name}": loss.item()}, step=epoch)


def train(model, train_loader, val_loader, pgd_attack,
          mode='natural', epochs=25, batch_size=256, learning_rate=0.001, momentum=0.9, weight_decay=2e-4,
          checkpoint_path='model1.pt'):

    # criterion should be CrossEntropyLoss as we are predicting identity
    criterion = nn.CrossEntropyLoss()
    # Use Adam optimizer as specified in the original notebook block
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # Assuming device is globally available ('cuda' or 'cpu')


    best_acc = 0.0 # Track best clean validation accuracy to save model

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        # training
        train_ep(model, train_loader, mode, pgd_attack, optimizer, criterion, epoch, train_loader.batch_size) # Pass actual batch_size from loader

        # Evaluate on validation set
        # We need to evaluate both clean and robustly on the same validation set
        # Assuming test_loader is used as the validation loader in the sanity check block
        # In a real training loop, you would likely have a separate validation_loader

        # Evaluate robustly first to get robust metrics
        # Pass test_loader as the validation loader here for consistency with the sanity check
        robust_loss, robust_accuracy = eval_robust_demogpairs(model, val_loader, pgd_attack, device, name='robust-validation', epoch=epoch)
        # Evaluate cleanly second
        val_loss, val_acc = eval_test_demogpairs(model, val_loader, device, name='clean-validation', epoch=epoch)


        # remember best acc and save checkpoint based on clean validation accuracy
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)

        # save checkpoint if is a new best
        if is_best:
            print(f"Saving best model with clean validation accuracy: {best_acc:.2f}%")
            torch.save(model.state_dict(), checkpoint_path)
        else:
             print(f"Validation accuracy did not improve. Best clean accuracy so far: {best_acc:.2f}%")


    print("\nTraining finished.")
    print(f"Best clean validation accuracy: {best_acc:.2f}%")
    # Optionally load the best model state dict at the end
    # model.load_state_dict(torch.load(checkpoint_path))
    # print("Loaded best model checkpoint.")

In [38]:


#@title small sanity check

wandb.init(project="face-adv-fairness", name="demogpairs-sanity-check-dataloaders", config={"learning_rate": 0.001, "epochs": 1})
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Fix: Create val_loader directly from test_dataset, not from test_loader
# Also, model should be initialized with the correct number of classes (number of unique identities)
# Assuming 1000 unique identities based on previous code.
num_identity_classes = 1000
model = ResNet18(num_classes=num_identity_classes).to(device)

# Create DataLoader directly from the test_dataset list
# val_loader = DataLoader(test_dataset, batch_size=64, shuffle=False) # Shuffle usually False for validation

# label_debug(model, val_loader, device)

# # Ensure pgd attack is initialized with the model that has the correct output dimension
pgd = LinfPGDAttack(model, epsilon=8/255, step_size = 2/255, steps = 10)

# # Call eval_robust_celeba
# eval_test_demogpairs(model, test_loader, device, name = 'test_set_clean', epoch = 0)

eval_robust_demogpairs(model, test_loader, pgd, device, name='validation', epoch=0)

0,1
attack_success_rate_asian_female,▁
attack_success_rate_asian_male,▁
attack_success_rate_black_female,▁
attack_success_rate_black_male,▁
attack_success_rate_white_female,▁
attack_success_rate_white_male,▁
overall_attack_success_rate_validation,▁
robust_accuracy_asian_female,▁
robust_accuracy_asian_male,▁
robust_accuracy_black_female,▁

0,1
attack_success_rate_asian_female,0.0
attack_success_rate_asian_male,0.0
attack_success_rate_black_female,0.01389
attack_success_rate_black_male,0.0
attack_success_rate_white_female,0.0
attack_success_rate_white_male,0.0
overall_attack_success_rate_validation,100.0
robust_accuracy_asian_female,0.0
robust_accuracy_asian_male,0.0
robust_accuracy_black_female,0.0


Robust Test (validation): Average loss: 6.9175, Robust Accuracy: 0/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.0
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.0
Attack success rate for asian_female: 0.0
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (validation): 0.00%


(6.917504261158131, 0.0)

In [None]:
#@title convenience funtion to log predictions for a batch of test images
def log_test_predictions(images, labels, outputs, predicted, test_table, log_counter):
  # obtain confidence scores for all classes
  scores = F.softmax(outputs.data, dim=1)
  log_scores = scores.cpu().numpy()
  log_images = images.cpu().numpy()
  log_labels = labels.cpu().numpy()
  log_preds = predicted.cpu().numpy()
  # adding ids based on the order of the images
  _id = 0
  for i, l, p, s in zip(log_images, log_labels, log_preds, log_scores):
    # Transpose image dimensions from (C, H, W) to (H, W, C) for wandb.Image
    i_transposed = np.transpose(i, (1, 2, 0))

    # add required info to data table:
    # id, image pixels, model's guess, true label, scores for all classes
    img_id = str(_id) + "_" + str(log_counter)
    # Use the transposed image data
    test_table.add_data(img_id, wandb.Image(i_transposed), p, l, *s)
    _id += 1
    if _id == batch_size:
      break

In [41]:
#@title training run: new, with balanced datasets

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
epsilon = 8/255
training_mode = "adv_train" # Or 'natural' if you want to train naturally
# or trades which was the method used in the original paper
batch_size = 64

proportions = [0.25, 0.5, 0.75]

for proportion in proportions:
    # Re-initialize model and attack for each proportion if needed, otherwise move outside loop
    # If training separately for each proportion, re-initialization is correct.
    model = ResNet18(num_classes=999).to(device) # ResNet for identity classification
    # Note: number of classes (1000) should match the number of unique identities
    # it gives very few examples on the test set


    # make a new run for each example
    wandb.init(project="face-adv-fairness", name=f"demogpairs-asian-women-{proportion}", config={"learning_rate": 0.001, "epochs": 30})


    num_identity_classes = 999 # Assuming the ResNet18 model is configured for 1000 classes
    model = ResNet18(num_classes=num_identity_classes).to(device)

    pgd = LinfPGDAttack(model, epsilon=epsilon, step_size = epsilon/10, steps = 10)

    # train function definition already includes criterion and optimizer definition.
    # Move best_acc outside the inner epoch loop within the train function.
    # The train function saves checkpoint, so best_acc is managed internally.

    # maybe create these on the go?
    train_loader = train_demogpair_loaders[prop]

    val_loader = test_loader


    # call the modified train function
    train(model, train_loader=train_loader, mode=training_mode,
          val_loader=val_loader,
          pgd_attack=pgd, learning_rate=0.001,
          checkpoint_path=f'model_adv_prop{int(proportion*100)}.pt', epochs=20) # Save checkpoints with proportion




cuda



Epoch 1/20
Robust Test (robust-validation): Average loss: 6.7663, Robust Accuracy: 3/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.0
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.008333333333333333
Attack success rate for asian_female: 0.005555555555555556
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (robust-validation): 100.00%
Test (clean-validation): Average loss: 66.9977, Accuracy: 2/2160 (0%)
Saving best model with clean validation accuracy: 0.09%

Epoch 2/20
Robust Test (robust-validation): Average loss: 7.8329, Robust Accuracy: 4/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.002777777777777778
Robust accuracy for asian_

0,1
accuracy_asian_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_asian_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
attack_success_rate_asian_female,▄▂▄▂▅▁▂▄▂▅▄▅▇▇▄█▇▄▇▇
attack_success_rate_asian_male,▁▁▁▃▃▁▁█▅▁▃▁▁▅▁▁▅▃▆▃
attack_success_rate_black_female,▁▁▁▁▁▄▂▂▄▂▄▄▄▂▂▂█▂▇▄
attack_success_rate_black_male,▁▁▁▂▂▂▂▄▅▅▂▁▄▄▄▂▁▂▂█

0,1
accuracy_asian_female,0.0
accuracy_asian_male,0.0
accuracy_black_female,0.0
accuracy_black_male,0.0
accuracy_white_female,0.0
accuracy_white_male,0.0
attack_success_rate_asian_female,0.01111
attack_success_rate_asian_male,0.00278
attack_success_rate_black_female,0.00556
attack_success_rate_black_male,0.01389



Epoch 1/20
Robust Test (robust-validation): Average loss: 6.7645, Robust Accuracy: 2/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.0
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.005555555555555556
Attack success rate for asian_female: 0.005555555555555556
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (robust-validation): 100.00%
Test (clean-validation): Average loss: 15.7596, Accuracy: 2/2160 (0%)
Saving best model with clean validation accuracy: 0.09%

Epoch 2/20
Robust Test (robust-validation): Average loss: 6.7775, Robust Accuracy: 3/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack 

0,1
accuracy_asian_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_asian_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
attack_success_rate_asian_female,▂▁▃▁▁▄▃▁▄▂▆▅█▃▃▂▁▃▁▁
attack_success_rate_asian_male,▁▃▁▁▁▃▁▁▃▁▁▁▁▁▁▁▃▃██
attack_success_rate_black_female,▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁█▁
attack_success_rate_black_male,▁▁▅▁▁▁▁▁▁▅▁▁▁▁▅▁▁██▅

0,1
accuracy_asian_female,0.0
accuracy_asian_male,0.0
accuracy_black_female,0.0
accuracy_black_male,0.0
accuracy_white_female,0.0
accuracy_white_male,0.0
attack_success_rate_asian_female,0.00278
attack_success_rate_asian_male,0.00833
attack_success_rate_black_female,0.0
attack_success_rate_black_male,0.00278



Epoch 1/20
Robust Test (robust-validation): Average loss: 7.1556, Robust Accuracy: 3/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.0
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.008333333333333333
Attack success rate for asian_female: 0.005555555555555556
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (robust-validation): 66.67%
Test (clean-validation): Average loss: 51.3893, Accuracy: 3/2160 (0%)
Saving best model with clean validation accuracy: 0.14%

Epoch 2/20
Robust Test (robust-validation): Average loss: 7.9589, Robust Accuracy: 1/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.002777777777777778
Robust accuracy for asian_m

In [None]:
#@title training run: new, with balanced datasets

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
epsilon = 8/255
training_mode = "adv_train" # Or 'natural' if you want to train naturally
# or trades which was the method used in the original paper
batch_size = 64

proportions = [0.25, 0.5, 0.75]

for proportion in proportions:
    # Re-initialize model and attack for each proportion if needed, otherwise move outside loop
    # If training separately for each proportion, re-initialization is correct.
    model = ResNet18(num_classes=999).to(device) # ResNet for identity classification
    # Note: number of classes (1000) should match the number of unique identities
    # it gives very few examples on the test set


    # make a new run for each example
    wandb.init(project="face-adv-fairness", name=f"demogpairs-asian-women-{proportion}", config={"learning_rate": 0.001, "epochs": 50, 'steps':5})


    num_identity_classes = 999 # Assuming the ResNet18 model is configured for 1000 classes
    model = ResNet18(num_classes=num_identity_classes).to(device)

    pgd = LinfPGDAttack(model, epsilon=epsilon, step_size = epsilon/5, steps = 5)

    # train function definition already includes criterion and optimizer definition.
    # Move best_acc outside the inner epoch loop within the train function.
    # The train function saves checkpoint, so best_acc is managed internally.

    # maybe create these on the go?
    train_loader = train_demogpair_loaders[prop]

    val_loader = test_loader


    # call the modified train function
    train(model, train_loader=train_loader, mode=training_mode,
          val_loader=val_loader,
          pgd_attack=pgd, learning_rate=0.001,
          checkpoint_path=f'model_adv_prop{int(proportion*100)}.pt', epochs=50) # Save checkpoints with proportion




cuda


0,1
accuracy_asian_female,▁▁▁▁
accuracy_asian_male,▁▁▁▁
accuracy_black_female,▁▁▁▁
accuracy_black_male,▁▁▁▁
accuracy_white_female,▁▁▁▁
accuracy_white_male,▁▁▁▁
attack_success_rate_asian_female,▃▁▃█
attack_success_rate_asian_male,▁█▁▁
attack_success_rate_black_female,▁▁▁▁
attack_success_rate_black_male,▁█▁▁

0,1
accuracy_asian_female,0.0
accuracy_asian_male,0.0
accuracy_black_female,0.0
accuracy_black_male,0.0
accuracy_white_female,0.0
accuracy_white_male,0.0
attack_success_rate_asian_female,0.01389
attack_success_rate_asian_male,0.0
attack_success_rate_black_female,0.0
attack_success_rate_black_male,0.0



Epoch 1/50
Robust Test (robust-validation): Average loss: 6.7901, Robust Accuracy: 3/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.0
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.008333333333333333
Attack success rate for asian_female: 0.011111111111111112
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (robust-validation): 100.00%
Test (clean-validation): Average loss: 14.5563, Accuracy: 4/2160 (0%)
Saving best model with clean validation accuracy: 0.19%

Epoch 2/50
Robust Test (robust-validation): Average loss: 6.6014, Robust Accuracy: 4/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack 

0,1
accuracy_asian_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_asian_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
attack_success_rate_asian_female,▆▂▂▁▂█▅▅▅▇▆▂▂▂▂▃▃▃▆▅▆█▇▆▁▆▃▅▆▅▅▅▆▅▃▃▅▅▅▆
attack_success_rate_asian_male,▁▁▁▁▅▅▁▅▁▁█▅█▁▅▅▅▅▅▁▅▅▁▅██▁▅▁▁▅▁▁▁▅▅▁▅▅▅
attack_success_rate_black_female,▁▁▁▁▁▁▁▃▃▁▃▃▁▁▆▃▃▁▁▁▆▁▁▁▁▃▃▃▆▃▆▆▆▆▆▆▆█▆▆
attack_success_rate_black_male,▁▁▅▁▅▁▁▁▁▅▁▁▅▅▁▁▅▅▁▁▁▅▁▁▁▁█▁▁▅▁▁▁▁▁▁▁▁▁▁

0,1
accuracy_asian_female,0.0
accuracy_asian_male,0.0
accuracy_black_female,0.0
accuracy_black_male,0.0
accuracy_white_female,0.0
accuracy_white_male,0.0
attack_success_rate_asian_female,0.01111
attack_success_rate_asian_male,0.00278
attack_success_rate_black_female,0.00556
attack_success_rate_black_male,0.0



Epoch 1/50
Robust Test (robust-validation): Average loss: 6.6758, Robust Accuracy: 4/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.002777777777777778
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.011111111111111112
Attack success rate for asian_female: 0.008333333333333333
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (robust-validation): 100.00%
Test (clean-validation): Average loss: 162.9593, Accuracy: 4/2160 (0%)
Saving best model with clean validation accuracy: 0.19%

Epoch 2/50
Robust Test (robust-validation): Average loss: 6.5222, Robust Accuracy: 3/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian

0,1
accuracy_asian_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_asian_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_black_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
accuracy_white_male,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
attack_success_rate_asian_female,▅▂▂▂▃▃▁▁▁▁▆▃▆▅█▆▂▃▂▅▅▁▃▆▆█▆▇▃▆▃▆▃▇▇▆▆▅▇▇
attack_success_rate_asian_male,▃▁▅▃▁▁▁▁▁▁▁▁▁▁▃▅▃▃█▃▃▃▃▁▁▅▅▃▃▅▅▃▅▃▃▅▃▆▅▅
attack_success_rate_black_female,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▅▅▁▁▁▅▅▅▅▅▅▁▁▅▁█▅▅▅▅▅▅█
attack_success_rate_black_male,▁▁▁▃▁▁▁▁▁▁▁▁▃▃▁▁▁▅▃▁▁▃█▆▅▃▁▆▃▁▃▅▃▃▃▃▁▁▃▁

0,1
accuracy_asian_female,0.0
accuracy_asian_male,0.0
accuracy_black_female,0.0
accuracy_black_male,0.0
accuracy_white_female,0.0
accuracy_white_male,0.0
attack_success_rate_asian_female,0.01389
attack_success_rate_asian_male,0.00556
attack_success_rate_black_female,0.00556
attack_success_rate_black_male,0.0



Epoch 1/50
Robust Test (robust-validation): Average loss: 17.6768, Robust Accuracy: 1/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack success rate for asian_male: 0.0
Robust accuracy for white_male: 0.0
Attack success rate for white_male: 0.0
Robust accuracy for black_female: 0.0
Attack success rate for black_female: 0.0
Robust accuracy for asian_female: 0.002777777777777778
Attack success rate for asian_female: 0.0
Robust accuracy for black_male: 0.0
Attack success rate for black_male: 0.0
Overall Attack success rate (robust-validation): 0.00%
Test (clean-validation): Average loss: 601.0899, Accuracy: 0/2160 (0%)
Validation accuracy did not improve. Best clean accuracy so far: 0.00%

Epoch 2/50
Robust Test (robust-validation): Average loss: 6.5770, Robust Accuracy: 5/2160 (0%)
Robust accuracy for white_female: 0.0
Attack success rate for white_female: 0.0
Robust accuracy for asian_male: 0.0
Attack su