<a href="https://colab.research.google.com/github/mgozon/DLG-UROP/blob/main/Batch_DLG_Iris_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Batch-DLG - Iris Dataset
This notebook modifies the code in [Deep Leakage from Gradients](https://gist.github.com/Lyken17/91b81526a8245a028d4f85ccc9191884) to work with the Iris Dataset. In addition, it explores whether it is possible to repeat the same procedure on the batch input gradient.

In [69]:
# setting up libraries and device
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad
torch.manual_seed(100) # for generating the same random weights

# for testing
from random import randint
from random import shuffle
from itertools import permutations

from sklearn.datasets import load_iris
dst = load_iris()

print(torch.__version__)
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
print("Running on %s" % device)

1.12.1+cu113
Running on cuda


In [70]:
# auxiliary functions for NN - conver to onehot and loss function
def label_to_onehot(target, num_classes = 3):
    target = torch.unsqueeze(target, 1)
    onehot_target = torch.zeros(target.size(0), num_classes, device=target.device)
    onehot_target.scatter_(1, target, 1)
    return onehot_target

def cross_entropy_for_onehot(pred, target):
    return torch.mean(torch.sum(- target * F.log_softmax(pred, dim=-1), 1))

In [71]:
# a random fully connected neural network with random weights and biases
def weights_init(m):
    if hasattr(m, "weight"):
        m.weight.data.uniform_(-0.5, 0.5)
    if hasattr(m, "bias"):
        m.bias.data.uniform_(-0.5, 0.5)
    
class FcNet(nn.Module):
    def __init__(self):
        super(FcNet, self).__init__()
        act = nn.Sigmoid
        self.body = nn.Sequential(
            nn.Linear(4, 100),
            act(),
            nn.Linear(100, 100),
            act(),
            nn.Linear(100, 100),
            act(),
            nn.Linear(100, 3),
            act(),
        )
        
    def forward(self, x):
        out = self.body(x)
        out = out.view(out.size(0), -1)
        return out
    
net = FcNet().to(device)
    
net.apply(weights_init)
criterion = cross_entropy_for_onehot

In [72]:
# process input flowers and compute gradient of batch
def batch_grad(flower_indices, verbose = 0):
    n = len(flower_indices)

    gt_dataset = []
    gt_labels = []
    for flower_index in flower_indices:
        gt_data = torch.tensor(dst.data[flower_index, :]).to(device)
        gt_data = gt_data.view(1, *gt_data.size())
        gt_dataset.append(gt_data)
        gt_label = torch.tensor(dst.target[flower_index]).to(device)
        gt_label = gt_label.view(1)
        gt_labels.append(gt_label)
        gt_onehot_label = label_to_onehot(gt_label, num_classes=3)

        # print out (data, label) and verify onehot
        if (verbose):
            print(f"gt_data: {gt_data}")
            print(f"gt_label: {gt_label}")
            print(f"gt_onehot_label: {gt_onehot_label}")
            print(f"flower {flower_index} has label (gt, onehot) = ({gt_label.item()}, {torch.argmax(gt_onehot_label, dim=-1).item()})")

        # compute original gradient 
        out = net(gt_data.float())
        y = criterion(out, gt_onehot_label)

        if (flower_index == flower_indices[0]):
          batch_dy_dx = torch.autograd.grad(y, net.parameters())
        else:
          batch_dy_dx = tuple(map(sum, zip(batch_dy_dx, torch.autograd.grad(y, net.parameters())))) # sum of gradients

    batch_dy_dx = tuple(part/n for part in batch_dy_dx)
    original_dy_dx = list((_.detach().clone() for _ in batch_dy_dx)) # share the gradients with other clients

    # verifying dy_dx is average of list of flowers
    if (verbose >= 2):
      print(original_dy_dx)
    
    return original_dy_dx, gt_dataset, gt_labels

In [73]:
# DLG algorithm on a given set of flowers and returns the hypothesized input
def batch_DLG(original_dy_dx, n, gt_data_len, gt_onehot_label_len, verbose = 0):

    # identify (data, label) using Adam on the squared difference between the original and guessed gradient
    dummy_data = [torch.randn(gt_data_len).to(device).requires_grad_(True) for i in range(n)]
    dummy_label = [torch.randn(gt_onehot_label_len).to(device).requires_grad_(True) for i in range(n)]
    optimizer = torch.optim.LBFGS(dummy_data+dummy_label)

    global opt_steps; opt_steps = 0
    for iters in range(100):

        # closure function needed for LBFGS optimizer
        def closure():
            global opt_steps; opt_steps += 1

            # compute gradient of dummy data/label
            optimizer.zero_grad()
            for i in range(n):
                pred = net(dummy_data[i]) 
                #print(f"prediction: {pred} from data: {dummy_data.data} and label: {dummy_label}") # uncomment to see optimization updates
                dummy_onehot_label = F.softmax(dummy_label[i], dim=-1)
                dummy_loss = criterion(pred, dummy_onehot_label) if (i == 0) else dummy_loss + criterion(pred, dummy_onehot_label)
            
            dummy_loss /= n
            dummy_dy_dx = torch.autograd.grad(dummy_loss, net.parameters(), create_graph=True)
            
            # compute loss function, i.e. the SE of the gradients
            grad_diff = 0
            grad_count = 0
            for gx, gy in zip(dummy_dy_dx, original_dy_dx):
                grad_diff += ((gx - gy) ** 2).sum()
                grad_count += gx.nelement()

            grad_diff.backward()
            return grad_diff
        
        # perform GD and log information
        optimizer.step(closure)
        current_loss = closure()
        if (verbose == 2):
            print('current loss: ', iters, "%.4f" % current_loss.item())
            print('dummy data: ', dummy_data)
            print('dummy labels: ', dummy_label)
        
        # if current_loss is small enough, then the model has 'converged'
        if (closure() < 1e-9):
            break
    
    return dummy_data, opt_steps

In [77]:
# verify batch-DLG manually
flower_indices = [87, 67, 34, 12, 100]
n = len(flower_indices)
original_dy_dx, gt_dataset, gt_labels = batch_grad(flower_indices, 0)
guess, steps = batch_DLG(original_dy_dx, n=len(flower_indices), gt_data_len=gt_dataset[0].size(), gt_onehot_label_len=label_to_onehot(gt_labels[0]).size(), verbose=1)
SE = sum([torch.sum((gt_dataset[i] - guess[i])**2).item() for i in range(n)])
print(f"Original data: {gt_dataset}")
print(f"Predicted data: {guess}")
print('(naive assignment SE, steps) = ', SE, steps)

Original data: [tensor([[6.3000, 2.3000, 4.4000, 1.3000]], device='cuda:0',
       dtype=torch.float64), tensor([[5.8000, 2.7000, 4.1000, 1.0000]], device='cuda:0',
       dtype=torch.float64), tensor([[4.9000, 3.1000, 1.5000, 0.2000]], device='cuda:0',
       dtype=torch.float64), tensor([[4.8000, 3.0000, 1.4000, 0.1000]], device='cuda:0',
       dtype=torch.float64), tensor([[6.3000, 3.3000, 6.0000, 2.5000]], device='cuda:0',
       dtype=torch.float64)]
Predicted data: [tensor([[5.8422, 2.6845, 4.1353, 0.9543]], device='cuda:0', requires_grad=True), tensor([[4.7462, 2.9267, 1.3718, 0.0624]], device='cuda:0', requires_grad=True), tensor([[6.2708, 2.3105, 4.3732, 1.3411]], device='cuda:0', requires_grad=True), tensor([[6.3229, 3.3103, 6.0374, 2.5194]], device='cuda:0', requires_grad=True), tensor([[4.9613, 3.1803, 1.5144, 0.2287]], device='cuda:0', requires_grad=True)]
(naive assignment SE, steps) =  78.95178247574894 432


In [75]:
# testing Batch-DLG on n random flowers
length = dst.data.shape[0]
perm = list(range(length))
shuffle(perm)
n = 8
flower_indices = perm[0:n]
print('flowers: ', flower_indices)

original_dy_dx, gt_dataset, gt_labels = batch_grad(flower_indices, 0)
guess, steps = batch_DLG(original_dy_dx, n, gt_dataset[0].size(), label_to_onehot(gt_labels[0]).size(), 1)
print(f"Original data: {gt_dataset}")
print(f"Predicted data: {guess}")
print('steps: ', steps)

# brute force best assignment - if larger n is needed i.e. n >= 9, then use network flow
best_perm = tuple(range(0, n))
best_SE = sum([torch.sum((gt_dataset[i] - guess[i])**2).item() for i in range(n)])

for permutation in permutations(range(0, n)):
    SE = 0
    for i in range(n):
        SE += torch.sum((gt_dataset[i] - guess[permutation[i]])**2).item()
    if (SE < best_SE):
        best_SE = SE
        best_perm = permutation

print('best guessed to real data assignment: ', best_perm)
print('best SE: ', best_SE)
guess_perm = [None] * n
for i in range(n):
    guess_perm[i] = guess[best_perm[i]]
print('guessed permutation: ', guess_perm)

print('side by side comparison of guessed to actual input data: ')
for i in range(n):
    print(i, ':', gt_dataset[i].tolist(), guess_perm[i].tolist())

flowers:  [135, 41, 136, 145, 99, 7, 46, 129]
Original data: [tensor([[7.7000, 3.0000, 6.1000, 2.3000]], device='cuda:0',
       dtype=torch.float64), tensor([[4.5000, 2.3000, 1.3000, 0.3000]], device='cuda:0',
       dtype=torch.float64), tensor([[6.3000, 3.4000, 5.6000, 2.4000]], device='cuda:0',
       dtype=torch.float64), tensor([[6.7000, 3.0000, 5.2000, 2.3000]], device='cuda:0',
       dtype=torch.float64), tensor([[5.7000, 2.8000, 4.1000, 1.3000]], device='cuda:0',
       dtype=torch.float64), tensor([[5.0000, 3.4000, 1.5000, 0.2000]], device='cuda:0',
       dtype=torch.float64), tensor([[5.1000, 3.8000, 1.6000, 0.2000]], device='cuda:0',
       dtype=torch.float64), tensor([[7.2000, 3.0000, 5.8000, 1.6000]], device='cuda:0',
       dtype=torch.float64)]
Predicted data: [tensor([[5.1312, 3.8606, 1.5707, 0.3258]], device='cuda:0', requires_grad=True), tensor([[7.6203, 3.1238, 5.8060, 2.3110]], device='cuda:0', requires_grad=True), tensor([[4.9940, 3.3334, 1.5347, 0.0330]], devi