# Transferability Metrics

TJ Kim <br/>
2/4/21

Updated <br/>
2/21/21

#### Objective: 
To the existing (and functioning) FGSM attack, add measurements of transferability between different networks.
This will help us compare as well. 

#### Transferability Metrics
- Size of input gradient
- Gradient Alignment
- Variance of Loss

#### Other Metrics
- Confidence of attack (Loss towards target)
- L2 Distance from original point


In [1]:
cd '/home/ubuntu/FedAtk/' 

/home/ubuntu/FedAtk


### Load Relevant Libraries and Modules

Load the relevant libraries for the federated learning code.

In [2]:
import torch

# Personal NN
from transfer_attacks.Personalized_NN import *

# Import the Rest

### Metric Callers

This will go inside a new PY file that calls individual metric calculations.

In [61]:
import copy
import torch

def calcNN_variance(network, data_x, data_y):
    """
    Take in Pytorch nn module (surrogate)
    and data loader tensor data in order to obtain variance of loss across empirical distribution
    """
    
    network.eval()
    
    loss_func = torch.nn.NLLLoss(reduction='none')
    loss = loss_func(network(data_x), data_y)
    EL1 = torch.mean(torch.mul(loss,loss))
    EL2 = torch.mul(torch.mean(loss),torch.mean(loss))
    
    return torch.sub(EL1, EL2)

def calcNN_alignment(network1, network2, data_x, data_y):
    """
    Take in surrogate and victim pytorch nn module, as well as dataloader tensor inputs
    """
    network1.eval()
    network2.eval()
    network1.zero_grad()
    network2.zero_grad()
    
    # Obtain gradient with respect to each input
    x_adv1 = Variable(data_x, requires_grad=True)
    h_adv1 = network1.forward(x_adv1)
    cost1 = network1.criterion(h_adv1, data_y)
    
    x_adv2 = Variable(data_x, requires_grad=True)
    h_adv2 = network2.forward(x_adv2)
    cost2 = network2.criterion(h_adv2, data_y)
    
    if x_adv1.grad is not None:
        x_adv1.grad.data.fill_(0)
    if x_adv2.grad is not None:
        x_adv2.grad.data.fill_(0)
    cost1.backward()
    cost2.backward()
    
    # Loop through each input and calculate norm
    temp = torch.zeros((x_adv1.shape[0],28,28))
    for i in range(x_adv1.shape[0]):
        
        num = torch.matmul(torch.transpose(x_adv1.grad[i,0.:,:], 0, 1),x_adv2.grad[i,0.:,:])
        den = torch.norm(input= x_adv1.grad[i,0,:,:],p=2) * torch.norm(input= x_adv2.grad[i,0,:,:],p=2)
        
        temp[i,:,:] = num/den
        
    return temp

def calcNN_ingrad(network, data_x, data_y, norm=2):
    """
    Take in pytorch nn module (victim)
    and data tensor to obtain size of input gradient 
    """
    
    network.eval()
    
    # Obtain gradient with respect to each input
    x_adv = Variable(data_x, requires_grad=True)
    h_adv = network.forward(x_adv)
    cost = network.criterion(h_adv, data_y)
    
    network.zero_grad()

    if x_adv.grad is not None:
        x_adv.grad.data.fill_(0)
    cost.backward()
    
    # Loop through each input and calculate norm
    temp = torch.zeros(x_adv.shape[0])
    for i in range(x_adv.shape[0]):
        temp[i] = torch.norm(input= x_adv.grad[i,0,:,:],p=norm)
    
    # Find mean of Norms
    return torch.mean(temp)
    

### Transferer

Copy existing transferer here and work on calling the metric calculations.

In [4]:
import yaml

# Import Custom Made Victim
from transfer_attacks.Personalized_NN import *
from transfer_attacks.Params import *
            
class Transferer(): 
    """
    - Collect all the FL NN 
    - Implement transfer attack sweep
    - Hold all the metrics of interest
    """
    
    def __init__(self, filename:str, config_name = None):
        
        # TO IMPLEMENT - Overwrite current file with config_name
        with open(r'configs/config.yaml') as file:
            self.config = yaml.load(file, Loader=yaml.FullLoader)
            
        self.file = filename
        
        # Matrix to Record Performance (Old Metrics)
        self.orig_acc_transfers = {}
        self.orig_similarities = {}
        self.orig_target_hit = {}
        self.adv_acc_transfers = {}
        self.adv_similarities = {}
        self.adv_target_hit = {}
        
        # Matrix to Record Performance (New Metrics - theoretical)
        
        # Attack Params
        self.ifsgm_params = IFSGM_Params()
        self.cw_params = CW_Params()
        
        # Other Params
        self.advNN_idx = None # int
        self.advNN = None # pytorch nn
        self.victim_idxs = None # List of ints
        self.victims = None # dict of pytorch nn
        
        # Recorded Data Points
        self.x_orig = None
        self.y_orig = None
        self.y_true = None
        self.x_adv = None
        self.y_adv = None
        
        # Transferability Metrics
        self.metric_variance = None # Single value
        self.metric_alignment = {} # Dict - key is victim NN id
        self.metric_ingrad = {} # Dict - key is victim NN id
        
    def generate_advNN(self, client_idx):
        """
        Select specific client to load neural network to 
        Load the data for that client
        Lod the weights for that client
        This is the client that will generate perturbations
        """
        
        # Import Data Loader for this FL set
        file_indices = [i for i in range(self.config['num_sets'])]
        client_slice = len(file_indices)//self.config['num_clients']
        
        # Import the loader for this dataset only
        self.loader = Dataloader(file_indices,[client_idx*(client_slice),min((client_idx+1)*(client_slice),35)])  
        self.loader.load_training_dataset()
        self.loader.load_testing_dataset()
        
        self.advNN_idx = client_idx
        self.advNN = load_FLNN(idx=client_idx, direc=self.file, loader=self.loader)
        
        return
    
    def generate_xadv(self, atk_type = "IFSGM"):
        """
        Generate perturbed images
        atk_type - "IFSGM" or "CW"
        """
        
        if (atk_type == "IFSGM") or (atk_type == "ifsgm"): 
            self.advNN.i_fgsm(self.ifsgm_params)
        elif (atk_type == "CW") or (atk_type == "cw"):
            self.advNN.CW_attack(self.cw_params)
        else:
            print("Attak type unidentified -- Running IFSGM")
            self.advNN.i_fgsm(self.ifsgm_params)
        
        # Record relevant tensors
        self.x_orig = self.advNN.x_orig
        self.y_orig = self.advNN.output_orig
        self.y_true = self.advNN.y_orig
        self.x_adv = self.advNN.x_adv
        self.y_adv = self.advNN.output_adv
    
    def generate_victims(self, client_idxs):
        """
        Load the pre-trained other clients in the system
        """
        
        self.victim_idxs = client_idxs
        self.victims = {}
    
        for i in self.victim_idxs:
            self.victims[i] = load_FLNN(idx=i, direc=self.file, loader=None)
    
    def send_to_victims(self, client_idxs):
        """
        Send pre-generated adversarial perturbations 
        client_idxs - list of indices of clients we want to attack (just victims)
        
        Then record the attack success stats accordingly
        """
        
        for i in client_idxs:
            self.victims[i].forward_transfer(self.x_orig,self.x_adv,
                                         self.y_orig,self.y_adv,
                                         self.y_true, self.ifsgm_params.target, 
                                         print_info=False)
            
            # Record Performance
            self.orig_acc_transfers[i] = self.victims[i].orig_test_acc
            self.orig_similarities[i] = self.victims[i].orig_output_sim
            self.orig_target_hit[i] = self.victims[i].orig_target_achieve

            self.adv_acc_transfers[i] = self.victims[i].adv_test_acc
            self.adv_similarities[i] = self.victims[i].adv_output_sim
            self.adv_target_hit[i] = self.victims[i].adv_target_achieve
            
    def check_empirical_metrics(self, orig_flag = True, batch_size = 1000):
        """
        Computes the following for the following models:
        - Size of input gradient - across data distribution across all victim NN
        - Gradient Alignment - Between the surrogate and each of the victim NN
        - Variance of loss - Just for the surrogate
        
        - Orig flag false uses new fresh data as inputs instead of xorig and yorig
          (used to attack victims)
        """
        
        # Load a Sample of data from the datalaoder
        if not orig_flag:
            image_data = self.advNN.dataloader.load_batch(batch_size)
            data_x  = torch.Tensor(image_data['input']).reshape(batch_size,1,28,28)
            data_y = torch.Tensor(image_data['label']).type(torch.LongTensor)

            if torch.cuda.is_available():
                data_y = data_y.cuda()
        else:
            data_x = self.x_orig
            data_y = self.y_orig
        
        self.metric_variance = calcNN_variance(self.advNN, data_x, data_y)
        for i in range(len(self.victims)):
            self.metric_alignment[i] = calcNN_alignment(self.advNN, self.victims[i], data_x, data_y) 
            self.metric_ingrad[i] = calcNN_ingrad(self.victims[i],data_x,data_y) 

### Load and Run the System

In [20]:
client_idx = 0
victim_idxs = [0,1,2,3,4,5,6,7]

# Generate NN and Victims
#transferer = Transferer(filename = 'exp2_neck2_head3')
#transferer.generate_advNN(client_idx = client_idx)
transferer.generate_victims(client_idxs = victim_idxs)

In [21]:
transferer.generate_xadv(atk_type = "ifsgm")
transferer.send_to_victims(victim_idxs)

In [60]:
# Run the empirical metrics
transferer.check_empirical_metrics(orig_flag = True, batch_size = 10)
print(transferer.metric_variance)
print(transferer.metric_ingrad)

tensor(0.0588, device='cuda:0', grad_fn=<SubBackward0>)
{0: tensor([6.5118e-02, 3.7973e+00, 1.4780e-01, 2.4235e+00, 3.7102e-01, 1.4618e-03,
        1.5876e-02, 2.8617e-02, 9.4379e-06, 1.6888e-01]), 1: tensor([7.8913e-02, 8.7229e+00, 8.0429e-02, 2.7824e+00, 1.4411e+00, 2.0141e-02,
        8.7402e-02, 4.8293e-02, 2.5223e-07, 8.7618e-01]), 2: tensor([2.9426e-02, 8.9910e+00, 1.1845e-01, 6.5815e+00, 1.1847e+00, 4.4809e-02,
        6.8365e-02, 1.6138e-01, 2.5911e-05, 7.0737e-02]), 3: tensor([2.4195e-01, 3.2880e+00, 6.4686e-01, 1.1122e+00, 1.9353e-02, 2.8432e-02,
        3.4569e-02, 3.6026e-02, 9.2752e-07, 1.1626e-01]), 4: tensor([1.4387e-01, 9.4165e+00, 6.8067e-02, 6.7760e+00, 1.1161e+00, 1.9865e-02,
        1.1960e-01, 1.8354e+00, 1.1989e-06, 2.7010e-01]), 5: tensor([1.7159e-01, 9.2250e+00, 5.3784e-01, 6.6097e+00, 1.4966e+00, 1.1977e-01,
        2.3251e-01, 4.4481e-02, 6.1137e-06, 6.3884e-01]), 6: tensor([1.6446e-02, 4.1397e+00, 1.7222e-01, 2.4781e+00, 2.2540e+00, 2.1469e-02,
        1.0356

In [23]:
for i in range(8):
    print(torch.argmax(transferer.victims[i](transferer.x_orig),dim=1))

tensor([ 9,  6,  1, 36, 36,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  4,  1,  4, 36,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  4,  1,  4, 36,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  6,  1, 36, 36,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  4,  1,  4, 36,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  4,  1,  4, 36,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  6,  1,  0,  2,  1,  1, 39,  4,  5], device='cuda:0')
tensor([ 9,  4, 47, 36, 36,  1,  1, 39,  4,  5], device='cuda:0')


In [24]:
for i in range(8):
    print(torch.argmax(transferer.victims[i](transferer.x_adv),dim=1))

tensor([20, 20, 20, 20, 20, 20, 20, 20, 20, 20], device='cuda:0')
tensor([20, 20, 20, 20, 20, 20, 20, 23, 20,  5], device='cuda:0')
tensor([20, 20, 20, 46, 46, 20, 46, 17,  4, 20], device='cuda:0')
tensor([20, 20, 20, 20, 20, 20, 20, 23, 20, 20], device='cuda:0')
tensor([20, 20, 20, 20, 20, 20, 20,  6, 20,  5], device='cuda:0')
tensor([20, 20, 20,  4, 20, 20, 20, 46,  4,  5], device='cuda:0')
tensor([20, 20, 20, 23, 20, 20, 20, 30, 20,  5], device='cuda:0')
tensor([ 4, 20, 20, 27, 20, 20, 20, 23,  4, 20], device='cuda:0')


In [25]:
for j in range(8):
    print("\n NN", j)
    print("orig_acc_transfers: ",transferer.orig_acc_transfers[j])
    print("orig_similarities: ",transferer.orig_similarities[j])
    print("orig_target_hit:",transferer.orig_target_hit[j])
    print("adv_acc_transfers:",transferer.adv_acc_transfers[j])
    print("adv_similarities:",transferer.adv_similarities[j])
    print("adv_target_hit:",transferer.adv_target_hit[j])


 NN 0
orig_acc_transfers:  tensor(0.8000, device='cuda:0')
orig_similarities:  tensor(1., device='cuda:0')
orig_target_hit: tensor(0., device='cuda:0')
adv_acc_transfers: tensor(0., device='cuda:0')
adv_similarities: tensor(1., device='cuda:0')
adv_target_hit: tensor(1., device='cuda:0')

 NN 1
orig_acc_transfers:  tensor(0.7000, device='cuda:0')
orig_similarities:  tensor(0.8000, device='cuda:0')
orig_target_hit: tensor(0., device='cuda:0')
adv_acc_transfers: tensor(0.1000, device='cuda:0')
adv_similarities: tensor(0.8000, device='cuda:0')
adv_target_hit: tensor(0.8000, device='cuda:0')

 NN 2
orig_acc_transfers:  tensor(0.7000, device='cuda:0')
orig_similarities:  tensor(0.8000, device='cuda:0')
orig_target_hit: tensor(0., device='cuda:0')
adv_acc_transfers: tensor(0.1000, device='cuda:0')
adv_similarities: tensor(0.5000, device='cuda:0')
adv_target_hit: tensor(0.5000, device='cuda:0')

 NN 3
orig_acc_transfers:  tensor(0.8000, device='cuda:0')
orig_similarities:  tensor(1., device=