<a href="https://colab.research.google.com/github/mgozon/DLG-UROP/blob/main/Batch_DLG_Iris_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Batch-DLG - Iris Dataset
This notebook modifies the code in [Deep Leakage from Gradients](https://gist.github.com/Lyken17/91b81526a8245a028d4f85ccc9191884) to work with the Iris Dataset. In addition, it explores whether it is possible to repeat the same procedure on the batch input gradient.

In [133]:
# setting up libraries and device
%matplotlib inline

import numpy as np
from pprint import pprint

from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import grad

from random import randint
from random import shuffle

from sklearn.datasets import load_iris
dst = load_iris()

print(torch.__version__)
device = "cpu"
if torch.cuda.is_available():
    device = "cuda"
print("Running on %s" % device)

1.12.1+cu113
Running on cuda


In [134]:
# auxiliary functions for NN - conver to onehot and loss function
def label_to_onehot(target, num_classes = 3):
    target = torch.unsqueeze(target, 1)
    onehot_target = torch.zeros(target.size(0), num_classes, device=target.device)
    onehot_target.scatter_(1, target, 1)
    return onehot_target

def cross_entropy_for_onehot(pred, target):
    return torch.mean(torch.sum(- target * F.log_softmax(pred, dim=-1), 1))

In [135]:
# a random fully connected neural network with random weights and biases
def weights_init(m):
    if hasattr(m, "weight"):
        m.weight.data.uniform_(-0.5, 0.5)
    if hasattr(m, "bias"):
        m.bias.data.uniform_(-0.5, 0.5)
    
class FcNet(nn.Module):
    def __init__(self):
        super(FcNet, self).__init__()
        act = nn.Sigmoid
        self.body = nn.Sequential(
            nn.Linear(4, 100),
            act(),
            nn.Linear(100, 100),
            act(),
            nn.Linear(100, 100),
            act(),
            nn.Linear(100, 3),
            act(),
        )
        
    def forward(self, x):
        out = self.body(x)
        out = out.view(out.size(0), -1)
        return out
    
net = FcNet().to(device)
    
net.apply(weights_init)
criterion = cross_entropy_for_onehot

In [142]:
# DLG algorithm on a given flower and returns the hypothesized input
def Batch_DLG(flower_indices, verbose = 0):
    n = len(flower_indices)

    gt_dataset = []
    gt_labels = []
    for flower_index in flower_indices:
        gt_data = torch.tensor(dst.data[flower_index, :]).to(device)
        gt_data = gt_data.view(1, *gt_data.size())
        gt_dataset.append(gt_data)
        gt_label = torch.tensor(dst.target[flower_index]).to(device)
        gt_label = gt_label.view(1)
        gt_labels.append(gt_label)
        gt_onehot_label = label_to_onehot(gt_label, num_classes=3)

        # print out (data, label) and verify onehot
        if (verbose == 2):
            print(f"gt_data: {gt_data}")
            print(f"gt_label: {gt_label}")
            print(f"gt_onehot_label: {gt_onehot_label}")
            print(f"flower {flower_index} has label (gt, onehot) = ({gt_label.item()}, {torch.argmax(gt_onehot_label, dim=-1).item()})")

        # compute original gradient 
        out = net(gt_data.float())
        y = criterion(out, gt_onehot_label)

        if (flower_index == flower_indices[0]):
          batch_dy_dx = torch.autograd.grad(y, net.parameters())
        else:
          batch_dy_dx = tuple(map(sum, zip(batch_dy_dx, torch.autograd.grad(y, net.parameters()))))

    #print(batch_dy_dx)
    batch_dy_dx = tuple(part/n for part in batch_dy_dx)
    # share the gradients with other clients
    original_dy_dx = list((_.detach().clone() for _ in batch_dy_dx))

    # generate dummy data and label
    dummy_data = [torch.randn(gt_data.size()).to(device).requires_grad_(True) for i in range(n)]
    dummy_label = [torch.randn(gt_onehot_label.size()).to(device).requires_grad_(True) for i in range(n)]

    # if (verbose):
    #     print("Dummy label is %d." % torch.argmax(dummy_label, dim=-1).item())

    # identify (data, label) using LBFGS on the squared difference between the original and guessed gradient
    

    global opt_steps
    opt_steps = 0
    for iters in range(10):
        idx = iters % n
        optimizer = torch.optim.LBFGS([dummy_data[idx], dummy_label[idx]]) # only update one dummy variable at a time
        for i in range(n):
            if (i != idx):
                dummy_data[i].requires_grad_(False)
                dummy_label[i].requires_grad_(False)
            else:
                dummy_data[i].requires_grad_(True)
                dummy_label[i].requires_grad_(True)

        def closure():
            global opt_steps
            opt_steps += 1
            optimizer.zero_grad()

            # compute loss
            for i in range(n):
                pred = net(dummy_data[i]) 
                #print(f"prediction: {pred} from data: {dummy_data.data} and label: {dummy_label}") # uncomment to see optimization updates
                dummy_onehot_label = F.softmax(dummy_label[i], dim=-1)
                dummy_loss = criterion(pred, dummy_onehot_label) if (i == 0) else dummy_loss + criterion(pred, dummy_onehot_label)
            
            dummy_loss /= n

            dummy_dy_dx = torch.autograd.grad(dummy_loss, net.parameters(), create_graph=True)
            
            grad_diff = 0
            grad_count = 0
            for gx, gy in zip(dummy_dy_dx, original_dy_dx):
                grad_diff += ((gx - gy) ** 2).sum()
                grad_count += gx.nelement()

            grad_diff.backward()
            
            return grad_diff
        
        optimizer.step(closure)
        current_loss = closure()
        if (verbose == 2):
            print(iters, "%.4f" % current_loss.item())
            print('dummy data: ', [dummy_data[i].tolist() for i in range(n)])
            print('dummy labels: ', [dummy_label[i].tolist() for i in range(n)])
        
        # if current_loss is small enough, then the model has 'converged'
        if (current_loss < 1e-9):
            break
    
    # compare results
    if (verbose):
        print(f"Original data: {gt_dataset}")
        print(f"Predicted data: {dummy_data}")
        print(f"Original label: {gt_labels}")
        #print(f"Predicted label: {torch.argmax(dummy_label).item()}")
        #print(f"Label SE: {((gt_data - dummy_data)**2).sum()}")
    
    return dummy_data, sum([torch.sum((gt_dataset[i] - dummy_data[i])**2).item() for i in range(n)]), opt_steps

In [146]:
# testing Batch-DLG on n random flowers
length = dst.data.shape[0]
perm = list(range(length))
shuffle(perm)
n = 1
print('flowers: ', perm[0:n])

guess, SE, steps = Batch_DLG(perm[0:n], verbose=1)
print('guess: ', [guess[i].tolist() for i in range(n)])
print('SE: ', SE)
print('steps: ', steps)


flowers:  [47]
Original data: [tensor([[4.6000, 3.2000, 1.4000, 0.2000]], device='cuda:0',
       dtype=torch.float64)]
Predicted data: [tensor([[-18.4930,   5.9030,  17.4342, -48.2738]], device='cuda:0',
       requires_grad=True)]
Original label: [tensor([0], device='cuda:0')]
guess:  [[[-18.49297332763672, 5.903026103973389, 17.43424415588379, -48.27377700805664]]]
SE:  3147.395810306997
steps:  103
