<a href="https://colab.research.google.com/github/mtrefilek/cs762/blob/main/Private_Classifier_(Opacus).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
MAX_GRAD_NORM = 1.2
EPSILON = .5
DELTA = 1e-5
EPOCHS = 20
LR = .1

In [None]:
BATCH_SIZE = 64
MAX_PHYSICAL_BATCH_SIZE = 64

In [None]:
#FEATURE_EXTRACTOR_NAME= 'clip-vit-base-patch32'
FEATURE_EXTRACTOR_NAME = 'vit-base-patch32-384'
#FEATURE_EXTRACTOR_NAME = 'scatternet'
DSET_NAME = 'EuroSAT' #('MNIST', 'FMNIST', 'CIFAR10', 'CIFAR100', 'PlantDisease', 'EuroSAT', 'ChestXRay') 

In [None]:
import numpy as np
import torch, os
import torchmetrics
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm.notebook import tqdm
from opacus import PrivacyEngine
from torch.utils.data import DataLoader, TensorDataset
from opacus.utils.batch_memory_manager import BatchMemoryManager
from opacus.layers.dp_multihead_attention import DPMultiheadAttention
torch.manual_seed(0)
np.random.seed(0)

In [None]:
## Models
class LinearNet(nn.Module):
    def __init__(self, d_in, d_out):
        super(LinearNet, self).__init__()
        self.d_in = d_in
        self.d_out = d_out
        self.linear = nn.Linear(d_in, d_out)
    def forward(self, x):
        return self.linear(x)
    
class LinearAdjustedNet(nn.Module):
    def __init__(self, d_in, d_out, l, th):
        super(LinearAdjustedNet, self).__init__()
        self.d_in = d_in
        self.d_out = d_out
        self.l = l
        self.th = th
        self.d_ms = [-(d_in//-l) for i in range(l)]
        if d_in % l != 0:
            self.d_ms[-1] = d_in - self.d_ms[0] * (l-1)
        self.linears = nn.ModuleList([nn.Linear(d_m, d_out) for d_m in self.d_ms])
    def forward(self, x):
        mask = (torch.abs(x) >= self.th).type_as(x)
        x *= mask
        return torch.stack([f(x[:,i*self.d_ms[i]:(i+1)*self.d_ms[i]]) for i, f in enumerate(self.linears)], dim=-1)
    
class DPMiniAttentionNet(nn.Module):
    def __init__(self, d_in, d_out, l, th, num_heads):
        super(DPMiniAttentionNet, self).__init__()
        assert d_in % l == 0, "d_in should be dividable by l."
        self.d_in = d_in
        self.d_out = d_out
        self.l = l
        self.th = th
        self.embed_dim = d_in // l
        self.num_heads = num_heads
        self.linear = nn.Linear(d_in, d_out)
        self.attention = DPMultiheadAttention(embed_dim=self.embed_dim, num_heads=self.num_heads)
        
        #self.modules = nn.ModuleDict({'linear': nn.Linear(d_in, d_out),
        #                             'attention': nn.MultiheadAttention(embed_dim=self.embed_dim, 
        #                                                                num_heads=self.num_heads, 
        #                                                                batch_first=True)})
    def forward(self, x):
#         mask = (torch.abs(x) >= self.th).type_as(x)
#         x *= mask
        x = torch.stack([x[:,i*self.embed_dim:(i+1)*self.embed_dim] for i in range(self.l)], dim=0)
        x = self.attention.forward(x, x, x, need_weights=False)
        x = self.linear(torch.cat(x[0].unbind(0),dim=-1).unsqueeze(0))  ## batch_first=False unsqueeze->
                                                                        ## squeeze trick is needed 
                                                                        ## see: https://githubmemory.com/repo/pytorch/opacus/issues/158
        #x = self.linear(torch.flatten(x[0], 1, -1)) ## batch_first=True
        return x.squeeze(0)

In [None]:
def accuracy(preds, labels):
    return (preds == labels).mean()

def dptrain(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    #top1_acc = []
    
    with BatchMemoryManager(
        data_loader=train_loader, 
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, 
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):   
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            try:
                l = output.shape[2]
                #loss = criterion(output, target.expand(l, 1).t())
                loss = criterion(output, target.view(target.shape[0],1).expand(target.shape[0], l)) # Slightly more efficient
                #score = torch.mean(F.softmax(output, dim=1), dim=-1)
            except:
                loss = criterion(output, target)
                #score = F.softmax(output, dim=1)
                
            #preds = np.argmax(score.detach().cpu().numpy(), axis=1)
            #labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            #acc = accuracy(preds, labels)
            losses.append(loss.item())
            #top1_acc.append(acc)

            loss.backward()
            optimizer.step()

        epsilon = privacy_engine.get_epsilon(DELTA)
        print(f"\tTrain Epoch: {epoch} "
              f"Loss: {np.mean(losses):.6f} "
              #f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
              f"(ε = {epsilon:.2f}, δ = {DELTA})")
        
def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    #top1_acc = []

    for i, (images, target) in enumerate(train_loader):   
        optimizer.zero_grad()
        images = images.to(device)
        target = target.to(device)

        # compute output
        output = model(images)
        try:
            l = output.shape[2]
            #loss = criterion(output, target.expand(l, 1).t())
            loss = criterion(output, target.view(target.shape[0],1).expand(target.shape[0], l)) # Slightly more efficient
            #score = torch.mean(F.softmax(output, dim=1), dim=-1)
        except:
            loss = criterion(output, target)
            #score = F.softmax(output, dim=1)

        #preds = np.argmax(score.detach().cpu().numpy(), axis=1)
        #labels = target.detach().cpu().numpy()

        # measure accuracy and record loss
        #acc = accuracy(preds, labels)
        losses.append(loss.item())
        #top1_acc.append(acc)

        loss.backward()
        optimizer.step()

    print(f"\tTrain Epoch: {epoch} "
          f"Loss: {np.mean(losses):.6f} "
          #f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
         )
        
def test(model, test_loader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    losses = []
    top1_acc = []
    auroc = torchmetrics.AUROC()
    auroc.num_classes = N_CLS

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            try:
                l = output.shape[2]
                #loss = criterion(output, target.repeat(l, 1).t())
                loss = criterion(output, target.view(target.shape[0],1).expand(target.shape[0], l))
                score = torch.mean(F.softmax(output, dim=1), dim=-1)
            except:
                loss = criterion(output, target)
                score = F.softmax(output, dim=1)

            auroc.update(score, target)
            preds = np.argmax(score.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)
    auc = auroc.compute()

    print(f"\tTest set:"
          f"Loss: {np.mean(losses):.6f} "
          f"Acc: {top1_avg * 100:.6f} "
          f"AUC: {auc:.6f} ")
    return np.mean(top1_acc), auc

In [None]:
dset_path = os.getcwd().replace('\\','/')+'/extracted_features/'+DSET_NAME+'_'+FEATURE_EXTRACTOR_NAME

In [None]:
if DSET_NAME in ('PlantDisease', 'EuroSAT'):
    feature_set = np.load(dset_path+'.npz', allow_pickle=True)
    X_tr, X_tst = [], []
    for f in feature_set['feature_matrices']:
        n_tst = len(f) // 10
        f = np.random.permutation(f)
        X_tr.append(f[n_tst:])
        X_tst.append(f[:n_tst])
else:
    feature_set = np.load(dset_path+'_train.npz', allow_pickle=True)
    feature_set_tst = np.load(dset_path+'_test.npz', allow_pickle=True)
    X_tr = list(feature_set['feature_matrices'])
    X_tst = list(feature_set_tst['feature_matrices'])

class_sizes_tr = [len(f) for f in X_tr]
class_sizes_tst = [len(f) for f in X_tst]
n_tr = np.sum(class_sizes_tr)
n_tst = np.sum(class_sizes_tst)
N_CLS = len(X_tr)
D_DIM = len(X_tr[0][0])
X_tr, X_tst = (np.vstack(X_tr), np.vstack(X_tst))
y_tr, y_tst = (np.zeros(n_tr), np.zeros(n_tst))
l = 0
for i, k in enumerate(class_sizes_tr):
    y_tr[l:l+k].fill(i)
    l += k
l = 0
for i, k in enumerate(class_sizes_tst):
    y_tst[l:l+k].fill(i)
    l += k

In [None]:
trainset = TensorDataset(torch.tensor(X_tr).float(), torch.tensor(y_tr).long())
train_loader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
testset = TensorDataset(torch.tensor(X_tst).float(), torch.tensor(y_tst).long())
test_loader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
## Specify the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")
model = LinearNet(D_DIM, N_CLS)
#model = LinearAdjustedNet(D_DIM, N_CLS, 16, .1)
#model = DPMiniAttentionNet(D_DIM, N_CLS, 16, .1, 4)
model = model.to(device)

In [None]:
## Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LR)
#optimizer = optim.RMSprop(model.parameters(), lr=LR)

In [None]:
if model.__class__.__name__ == 'DPMiniAttentionNet':
    batch_first = False
else:
    batch_first = True

In [None]:
## Model Check
# from opacus.validators import ModuleValidator
# errors = ModuleValidator.validate(model, strict=False)
# errors[-5:]
# model = ModuleValidator.fix(model)
# ModuleValidator.validate(model, strict=False)

In [None]:
# Privacy Engine
privacy_engine = PrivacyEngine()

model, optimizer, data_loader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM,
    batch_first=batch_first,
)

print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")

Using sigma=1.9133447265624999 and C=1.2




In [None]:
model.__class__.__name__

'GradSampleModule'

In [None]:
for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
    dptrain(model, train_loader, optimizer, epoch + 1, device)
    #train(model, train_loader, optimizer, epoch + 1, device)

Epoch:   0%|          | 0/20 [00:00<?, ?epoch/s]



	Train Epoch: 1 Loss: 0.961169 (ε = 0.18, δ = 1e-05)
	Train Epoch: 2 Loss: 0.511725 (ε = 0.20, δ = 1e-05)
	Train Epoch: 3 Loss: 0.434339 (ε = 0.22, δ = 1e-05)
	Train Epoch: 4 Loss: 0.398961 (ε = 0.24, δ = 1e-05)
	Train Epoch: 5 Loss: 0.383174 (ε = 0.26, δ = 1e-05)
	Train Epoch: 6 Loss: 0.370495 (ε = 0.27, δ = 1e-05)
	Train Epoch: 7 Loss: 0.369004 (ε = 0.29, δ = 1e-05)
	Train Epoch: 8 Loss: 0.367678 (ε = 0.31, δ = 1e-05)
	Train Epoch: 9 Loss: 0.362459 (ε = 0.33, δ = 1e-05)
	Train Epoch: 10 Loss: 0.357179 (ε = 0.35, δ = 1e-05)
	Train Epoch: 11 Loss: 0.357180 (ε = 0.36, δ = 1e-05)
	Train Epoch: 12 Loss: 0.359430 (ε = 0.38, δ = 1e-05)
	Train Epoch: 13 Loss: 0.356147 (ε = 0.40, δ = 1e-05)
	Train Epoch: 14 Loss: 0.354013 (ε = 0.41, δ = 1e-05)
	Train Epoch: 15 Loss: 0.354690 (ε = 0.43, δ = 1e-05)
	Train Epoch: 16 Loss: 0.355120 (ε = 0.44, δ = 1e-05)
	Train Epoch: 17 Loss: 0.356341 (ε = 0.46, δ = 1e-05)
	Train Epoch: 18 Loss: 0.357066 (ε = 0.47, δ = 1e-05)
	Train Epoch: 19 Loss: 0.361582 (ε = 

In [None]:
acc, auc = test(model, test_loader, device)

	Test set:Loss: 0.383763 Acc: 89.886143 AUC: 0.991445 




In [None]:
privacy_engine.get_epsilon(DELTA)

0.49978663601343887