In [1]:
# Install necessary packages if not already installed
# !pip install torch torchvision opacus wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
import os
import random
from tqdm import tqdm

# Set device (GPU if available)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Set random seeds for reproducibility
seed = 42
torch.manual_seed(seed)
if device.type == 'cuda':
    torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)


Using device: cuda


In [2]:
# Initialize wandb
import wandb

wandb_project_name = 'FER2013_DPSGD_Binary_Classification'
wandb_run_name = 'MLP_d-1000-1000-1_Epsilon120'

# Login to wandb (uncomment if necessary)
# !wandb login

# Initialize wandb run
wandb.init(project=wandb_project_name)


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mrahul-nakka02[0m ([33mrahul-nakka02-indian-institute-of-technology-madras[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
# Custom Dataset for loading data from CSV files
class FER2013Dataset(Dataset):
    def __init__(self, csv_file, transform=None, max_samples_per_class=250):
        self.data = pd.read_csv(csv_file)
        self.transform = transform if transform is not None else transforms.ToTensor()
        
        # Filter data to include only labels 3 and 4 (Happy and Fear)
        self.data = self.data[self.data['emotion'].isin([3, 4])]
        
        # Map labels to binary labels: 0 (Happy) and 1 (Fear)
        self.label_mapping = {3: 0, 4: 1}
        self.data['emotion'] = self.data['emotion'].map(self.label_mapping)
        
        # Balance the dataset to have max_samples_per_class samples per class
        self.data = self._balance_data(max_samples_per_class)
        self.data = self.data.reset_index(drop=True)
        
    def _balance_data(self, max_samples_per_class):
        balanced_data = pd.DataFrame()
        for label in [0, 1]:
            class_data = self.data[self.data['emotion'] == label]
            sampled_data = class_data.sample(n=max_samples_per_class, random_state=seed)
            balanced_data = pd.concat([balanced_data, sampled_data], axis=0)
        return balanced_data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        label = int(self.data.iloc[idx]['emotion'])
        pixels = np.array(self.data.iloc[idx]['pixels'].split(), dtype=np.float32).reshape(48, 48)
        
        if self.transform:
            pixels = self.transform(pixels)
        
        return pixels, label


In [4]:
# Paths to the training and test CSV files
train_csv = '/home/da23c014/PrivacyAI/CourseProject/datasets/fer2013/train.csv'  # Replace with the actual path
test_csv = '/home/da23c014/PrivacyAI/CourseProject/datasets/fer2013/test.csv'    # Replace with the actual path


# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    # Normalize the images if needed
    # transforms.Normalize(mean=[0.5], std=[0.5]),
])

# Create datasets
train_dataset = FER2013Dataset(csv_file=train_csv, transform=transform, max_samples_per_class=250)
test_dataset = FER2013Dataset(csv_file=test_csv, transform=transform, max_samples_per_class=250)

# Define batch size
batch_size = 64

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

print(f'Train dataset size: {len(train_dataset)}')
print(f'Test dataset size: {len(test_dataset)}')

Train dataset size: 500
Test dataset size: 500


In [7]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim):
        super(MLP, self).__init__()
        layers = []
        # Input layer with bias
        layers.append(nn.Linear(input_dim, hidden_dims[0], bias=True))
        layers.append(nn.ReLU())
        # Hidden layers without bias
        for i in range(len(hidden_dims) - 1):
            layers.append(nn.Linear(hidden_dims[i], hidden_dims[i+1], bias=False))
            layers.append(nn.ReLU())
        # Output layer without bias
        layers.append(nn.Linear(hidden_dims[-1], output_dim, bias=False))
        self.net = nn.Sequential(*layers)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        return self.net(x)


In [8]:
input_dim = 48 * 48  # Image size (48x48) flattened
hidden_dims = [1000, 1000]
output_dim = 1  # Binary classification output

model = MLP(input_dim, hidden_dims, output_dim).to(device)
print(model)


MLP(
  (net): Sequential(
    (0): Linear(in_features=2304, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=1000, bias=False)
    (3): ReLU()
    (4): Linear(in_features=1000, out_features=1, bias=False)
  )
)


In [16]:
from torch.func import functional_call, vmap, grad

# Loss function (Binary Cross Entropy with Logits)
def loss_fn(outputs, targets):
    return F.binary_cross_entropy_with_logits(outputs.squeeze(), targets.float())

# Compute per-sample gradients using functorch
def compute_per_sample_gradients(params, buffers, X_batch, y_batch):
    def compute_loss(param_dict, buffer_dict, sample, target):
        outputs = functional_call(model, (param_dict, buffer_dict), sample.unsqueeze(0))
        loss = loss_fn(outputs, target)
        return loss

    # Compute gradients per sample
    grad_fn = grad(compute_loss)
    per_sample_grads = vmap(grad_fn, in_dims=(None, None, 0, 0))(params, buffers, X_batch, y_batch)
    return per_sample_grads

# Compute l2 norms of per-sample gradients
def compute_l2_norm_per_gradient(per_sample_grads):
    l2_norms = torch.zeros(len(next(iter(per_sample_grads.values()))), device=device)
    for grad in per_sample_grads.values():
        l2_norms += torch.sum(grad.view(len(grad), -1) ** 2, dim=1)
    l2_norms = torch.sqrt(l2_norms)
    return l2_norms

# Clip gradients
def clip_gradients(per_sample_grads, l2_norms, max_norm):
    clipped_grads = {}
    for name in per_sample_grads.keys():
        grad = per_sample_grads[name]
        l2_norms_clipped = torch.clamp(l2_norms / max_norm, min=1.0)
        grad = grad / l2_norms_clipped.view(-1, *([1] * (grad.dim() - 1)))
        clipped_grads[name] = grad
    return clipped_grads

# DP-SGD update with gradient accumulation (if needed)
def dp_sgd_update(params, buffers, X_batch, y_batch, C, sigma, lr, batch_size):
    # Compute per-sample gradients
    per_sample_grads = compute_per_sample_gradients(params, buffers, X_batch, y_batch)
    # Compute l2 norms
    l2_norms = compute_l2_norm_per_gradient(per_sample_grads)
    # Clip gradients
    clipped_grads = clip_gradients(per_sample_grads, l2_norms, C)
    # Sum clipped gradients
    summed_grads = {}
    for name in clipped_grads.keys():
        summed_grads[name] = torch.sum(clipped_grads[name], dim=0)
    # Add noise
    for name in summed_grads.keys():
        noise = torch.normal(0, sigma * C, size=summed_grads[name].shape, device=summed_grads[name].device)
        summed_grads[name] += noise
    # Average gradients
    for name in summed_grads.keys():
        summed_grads[name] /= batch_size
    # Update parameters
    updated_params = {}
    for name in params.keys():
        updated_params[name] = params[name] - lr * summed_grads[name]
    return updated_params


In [22]:
# Privacy accounting using the RDP accountant
from opacus.accountants import RDPAccountant
from opacus.accountants.utils import get_noise_multiplier

# Parameters for DP
epsilon = 120
delta = 1e-5
C = 1.0  # Clipping norm
num_epochs = 1000000  # Adjusted to a practical number
steps = num_epochs * (len(train_dataset) // batch_size)

# Compute sampling rate
sampling_rate = batch_size / len(train_dataset)
print(f'Sampling rate: {sampling_rate:.4f}')

# Compute noise multiplier
sigma = get_noise_multiplier(
    target_epsilon=epsilon,
    target_delta=delta,
    sample_rate=sampling_rate,
    epochs=num_epochs,
)
print(f'Noise multiplier (sigma): {sigma:.4f}')


Sampling rate: 0.1280
Noise multiplier (sigma): 30.9949


In [23]:
# Evaluation function
def evaluate(model, params, buffers, data_loader):
    model.eval()
    total_correct = 0
    total_samples = 0
    total_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            y_batch = y_batch.view(-1)
            
            outputs = functional_call(model, (params, buffers), X_batch)
            loss = loss_fn(outputs, y_batch)
            total_loss += loss.item() * X_batch.size(0)
            
            preds = (torch.sigmoid(outputs.squeeze()) > 0.5).long()
            total_correct += (preds == y_batch.long()).sum().item()
            total_samples += y_batch.size(0)
    accuracy = total_correct / total_samples
    avg_loss = total_loss / total_samples
    return accuracy, avg_loss


In [None]:
# Initialize parameters and buffers
params = {k: v.detach().clone() for k, v in model.named_parameters()}
buffers = {k: v.detach().clone() for k, v in model.named_buffers()}

# Training parameters
lr = 0.01  # Learning rate
batch_size = 64  # Ensure batch_size matches the DataLoader
C = 1.0     # Clipping norm (already defined)
sigma = sigma  # Noise multiplier (already computed)

# Training loop
model.train()
for epoch in range(1, num_epochs + 1):
    epoch_loss = 0.0
    epoch_correct = 0
    total_samples = 0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)
        y_batch = y_batch.view(-1)
        
        # Move data to correct dtype
        X_batch = X_batch.to(torch.get_default_dtype())
        y_batch = y_batch.to(torch.get_default_dtype())
        
        # Perform DP-SGD update
        params = dp_sgd_update(params, buffers, X_batch, y_batch, C=C, sigma=sigma, lr=lr, batch_size=batch_size)
        
        # Calculate loss for logging
        outputs = functional_call(model, (params, buffers), X_batch)
        loss = loss_fn(outputs, y_batch)
        epoch_loss += loss.item() * X_batch.size(0)
        
        # Calculate accuracy
        preds = (torch.sigmoid(outputs.squeeze()) > 0.5).long()
        epoch_correct += (preds == y_batch.long()).sum().item()
        total_samples += y_batch.size(0)
    
    # Calculate average loss and accuracy
    avg_loss = epoch_loss / total_samples
    accuracy = epoch_correct / total_samples
    
    print(f'Epoch {epoch}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy * 100:.2f}%')
    
    # Log metrics to wandb
    wandb.log({
        'epoch': epoch,
        'train_loss': avg_loss,
        'train_accuracy': accuracy,
    })
    
    # Evaluate on test set
    test_accuracy, test_loss = evaluate(model, params, buffers, test_loader)
    print(f'Test Accuracy: {test_accuracy * 100:.2f}%, Test Loss: {test_loss:.4f}')
    
    # Log test metrics to wandb
    wandb.log({
        'test_accuracy': test_accuracy,
        'test_loss': test_loss,
    })


Epoch 1/1000000, Loss: 4.9697, Accuracy: 48.20%
Test Accuracy: 52.00%, Test Loss: 7.9331
Epoch 2/1000000, Loss: 8.3672, Accuracy: 49.80%
Test Accuracy: 54.80%, Test Loss: 10.2924
Epoch 3/1000000, Loss: 14.1127, Accuracy: 47.20%
Test Accuracy: 49.20%, Test Loss: 10.1367
Epoch 4/1000000, Loss: 13.6496, Accuracy: 52.00%
Test Accuracy: 49.00%, Test Loss: 13.3215
Epoch 5/1000000, Loss: 16.1799, Accuracy: 51.60%
Test Accuracy: 47.80%, Test Loss: 26.7555
Epoch 6/1000000, Loss: 33.6023, Accuracy: 51.20%
Test Accuracy: 50.80%, Test Loss: 34.8006
Epoch 7/1000000, Loss: 27.2995, Accuracy: 53.40%
Test Accuracy: 46.80%, Test Loss: 28.6103
Epoch 8/1000000, Loss: 27.1848, Accuracy: 48.40%
Test Accuracy: 50.00%, Test Loss: 33.8056
Epoch 9/1000000, Loss: 59.6155, Accuracy: 52.80%
Test Accuracy: 51.00%, Test Loss: 45.3775
Epoch 10/1000000, Loss: 44.7019, Accuracy: 51.40%
Test Accuracy: 46.20%, Test Loss: 49.0593
Epoch 11/1000000, Loss: 43.9187, Accuracy: 55.80%
Test Accuracy: 45.60%, Test Loss: 55.7010


Exception ignored in: <function _releaseLock at 0x7fa505cd3ee0>
Traceback (most recent call last):
  File "/home/da23c014/miniconda3/envs/new/lib/python3.9/logging/__init__.py", line 227, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


Test Accuracy: 51.40%, Test Loss: 91476.1804
Epoch 2396/1000000, Loss: 72716.8702, Accuracy: 56.80%
Test Accuracy: 50.00%, Test Loss: 93822.5952
Epoch 2397/1000000, Loss: 74815.0691, Accuracy: 57.40%
Test Accuracy: 50.00%, Test Loss: 93343.2141
Epoch 2398/1000000, Loss: 74342.4206, Accuracy: 57.40%
Test Accuracy: 51.00%, Test Loss: 95726.5032
Epoch 2399/1000000, Loss: 74571.4916, Accuracy: 57.00%
Test Accuracy: 50.00%, Test Loss: 93495.4395
Epoch 2400/1000000, Loss: 74117.9142, Accuracy: 56.60%
Test Accuracy: 49.00%, Test Loss: 93296.9836
Epoch 2401/1000000, Loss: 74118.7719, Accuracy: 56.00%
Test Accuracy: 49.60%, Test Loss: 93879.4031
Epoch 2402/1000000, Loss: 73205.4593, Accuracy: 58.00%
Test Accuracy: 49.40%, Test Loss: 93831.1657
Epoch 2403/1000000, Loss: 75351.2684, Accuracy: 55.80%
Test Accuracy: 49.20%, Test Loss: 95322.8390
Epoch 2404/1000000, Loss: 77012.8364, Accuracy: 55.60%
Test Accuracy: 49.00%, Test Loss: 96836.4426
Epoch 2405/1000000, Loss: 76222.4866, Accuracy: 55.80%


In [None]:
!nvidia-smi