In [None]:
# import libraries
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import time
import torch.optim as optim
import os
from opacus import PrivacyEngine
import math
import random
from opacus import GradSampleModule
from opacus.validators import ModuleValidator

In [2]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# model
def conv3x3(in_channels, out_channels, stride=1):
    """
    return 3x3 Conv2d
    """
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,stride=stride, padding=1, bias=False)


class ResidualBlock(nn.Module):
    """
    Initialize basic ResidualBlock with forward propogation
    """
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

In [None]:

# model
class ResNet(nn.Module):
    """
    Initialize  ResNet with forward propogation
    """
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)
        self.bn = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [None]:
model = ResNet(ResidualBlock, [3, 3, 3]).to(device)

# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Define Cost function
criterion = nn.CrossEntropyLoss()

In [6]:
# data and transformations

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])

train_transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor(),
    normalize])
    
test_transform = transforms.Compose([
    transforms.ToTensor(),
    normalize])


train_dataset = torchvision.datasets.CIFAR10(root='data/',train=True, transform=train_transform,download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/',train=False, transform=test_transform)

Files already downloaded and verified


In [None]:
# getting value of N
N =len(train_dataset)

In [None]:
# important parameters

sensitivity=1.0 # sensitivity is 1

sigma_N=0.02*N # noise multiplier for N

sigma_K=0.02*N # noise multiplier of K (sum of grad norms)

C=0.1  # given innreference [2]

C_star=4*C # External bound given as per paper

gl=0.0001 # gl

k_constant=5 # k in line 11, 24

b=16 # expected batch size

quantile=1 # lambda


In [10]:
# function to add noise to a scalar

def add_gaussian_noise(scalar, noise_multiplier):
    """
    Adds Gaussian noise to a scalar.
    
    Parameters:
    - scalar: The scalar value to which noise will be added.
    - noise_multiplier: A factor to control the standard deviation of the Gaussian noise.
    
    Returns:
    - The scalar value with added Gaussian noise.
    """

    sigma=noise_multiplier*sensitivity

    # Generate Gaussian noise with mean=0 and std=noise_multiplier
    noise = torch.normal(mean=0.0, std=sigma,size=())
    
    # Add the noise to the scalar
    noisy_scalar = scalar + noise
    
    return noisy_scalar

In [None]:
N_tilda=add_gaussian_noise(N,sigma_N) # Noisy dataset size

epochs=40 3 epoch number

iterations=math.ceil(N/b) 

In [None]:
for e in range(epochs):
     data_structure = []

     for i in range(len(train_dataset)):
        image_tensor, label = train_dataset[i]
        data_structure.append([image_tensor, label, 0.0, 0.0,0.0,0.0,-1.0,i]) # list to store the variablles for all data points

     K_cap_star=0
     model.train()
     K=0
     K_cap_list=[]
     K_cap=0
    
     for t in range(math.ceil(iterations)):

         if t==0:
             
             for i in range(len(data_structure)):
                 
                 image=data_structure[i][0]
                 label = data_structure[i][1]
                 image = image.unsqueeze(0)  
                #  label = label.unsqueeze(0)
                
                 image=image.to(device)

                 label = torch.tensor(label, dtype=torch.long)
                 label = label.unsqueeze(0) 
                 label=label.to(device)

                 # Zero gradients before each forward pass for this data point
                 model.zero_grad()

                 # Forward pass for this specific data point
                 outputs = model(image)

                 # Compute loss for this data point
                 loss = criterion(outputs, label)

                  # Backward pass
                 loss.backward()

                 

                 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1/C)

                 grad_norm=0.0

                 for param in model.parameters():
                     if param.grad is not None:
                        grad_norm += param.grad.norm().item()

                

                 data_structure[i][2]=grad_norm
                 
                 data_structure[i][4]=k_constant*max(grad_norm,gl)
                 
                 K+=grad_norm

                 torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1/C_star)

                 grad_norm_star=0.0

                 for param in model.parameters():
                     if param.grad is not None:
                        grad_norm_star += param.grad.norm().item()
                 
                 data_structure[i][3]=grad_norm_star

             K_cap_list.append(add_gaussian_noise(K,sigma_K))

             for idx in range(iterations-1):
                 K_cap_list.append(add_gaussian_noise(K_cap_list[-1],sigma_K))

            
         for i in range(len(data_structure)):

             data_structure[i][5]=min(b*data_structure[i][4]/K_cap_list[t],1)

         p_q=[row[4] for row in data_structure]

         probabilities_q = np.array(p_q) / np.sum(p_q)

         chi_q_idxs = np.random.choice(len(data_structure), size=k_constant*b, replace=False, p=probabilities_q)


         for j in chi_q_idxs:

            image=data_structure[j][0]
            label = data_structure[j][1]
            image = image.unsqueeze(0)  
            # label = label.unsqueeze(0)

            image=image.to(device)
            label = torch.tensor(label, dtype=torch.long)
            label = label.unsqueeze(0) 
            label=label.to(device)

            # Zero gradients before each forward pass for this data point
            model.zero_grad()
            # Forward pass for this specific data point
            outputs = model(image)
            # Compute loss for this data point
            loss = criterion(outputs, label)
             # Backward pass
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1/min(abs(data_structure[j][4]),C))

            grad_norm=0.0

            for param in model.parameters():
                     if param.grad is not None:
                        grad_norm += param.grad.norm().item()

            data_structure[j][2]=grad_norm

            data_structure[j][6]=grad_norm/data_structure[j][4]

            data_structure[j][4]=k_constant*max(data_structure[j][2],gl)
            
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1/C_star)

            grad_norm_star=0.0

            for param in model.parameters():
                     if param.grad is not None:
                        grad_norm += param.grad.norm().item()

            data_structure[j][3]=grad_norm_star

         chi_q = [row for row in data_structure if row[6] != -1.0]

         p_p = np.array([row[6] for row in chi_q])

         probabilities_p = p_p/ np.sum(p_p)

         chi_p=np.random.choice(chi_q, size=b, replace=False, p=probabilities_p)


     # compute gradient for selected batch
         # line 26
     # optimize (line 27)
         print("Executed till here")

     # summate K_cap_star
     for i in range(len(data_structure)):
         K_cap_star+=data_structure[i][3]
     
     K_cap_star=add_gaussian_noise(K_cap_star,0.02*N*C)
     C=(K_cap_star/N_tilda)*quantile
        

KeyboardInterrupt: 

In [None]:
def evaluate(model, test_loader, device, epoch):
    """
    Evaluate the model on the test set
    """
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    
    # Save model checkpoint (only from rank 0)
    torch.save(model.module.state_dict(), f'model_weight/resnet_epoch_{epoch}_acc_{accuracy:.2f}.ckpt')
