# Inter-Boundary Distance Measurement
TJ Kim

11.9.21

### Summary:
- Given multiple models performing identical task and a test set, measure the inter-boundary distance to measure transferability between models.
- Legitimate direction - any point x and closest data point in test set that changes the class
- Adversarial direction - any point x and smallest amount of perturbation that changes class
- Random direction - any point x and random perturbation drawn uniformly to misclassify

In [1]:
cd /home/ubuntu/FedEM/

### Import Libraries

In [2]:
# Import General Libraries
import os
import argparse
import torch
import copy
import pickle
import random
import numpy as np
import pandas as pd

# Import FedEM based Libraries
from utils.utils import *
from utils.constants import *
from utils.args import *
from torch.utils.tensorboard import SummaryWriter
from run_experiment import *
from models import *

# Import Transfer Attack
from transfer_attacks.Personalized_NN import *
from transfer_attacks.Params import *
from transfer_attacks.Transferer import *
from transfer_attacks.Args import *

In [3]:
# Boundary Transferer Imports
# Import Custom Made Victim
from transfer_attacks.Personalized_NN import *
from transfer_attacks.Params import *
from transfer_attacks.Attack_Metrics import *
import pandas as pd
from transfer_attacks.Transferer import *


### Load Models and Data

In [4]:
# Manually set argument parameters
args_ = Args()
args_.experiment = "cifar10"
args_.method = "FedEM"
args_.decentralized = False
args_.sampling_rate = 1.0
args_.input_dimension = None
args_.output_dimension = None
args_.n_learners= 3
args_.n_rounds = 10
args_.bz = 128
args_.local_steps = 1
args_.lr_lambda = 0
args_.lr =0.03
args_.lr_scheduler = 'multi_step'
args_.log_freq = 10
args_.device = 'cuda'
args_.optimizer = 'sgd'
args_.mu = 0
args_.communication_probability = 0.1
args_.q = 1
args_.locally_tune_clients = False
args_.seed = 1234
args_.verbose = 1
args_.save_path = 'weights/cifar/21_09_28_first_transfers/'
args_.validation = False

# Generate the dummy values here
aggregator, clients = dummy_aggregator(args_)

==> Clients initialization..
===> Building data iterators..


100%|██████████████████████████████████████████| 80/80 [00:00<00:00, 171.01it/s]


===> Initializing clients..


100%|███████████████████████████████████████████| 80/80 [00:31<00:00,  2.51it/s]


==> Test Clients initialization..
===> Building data iterators..


0it [00:00, ?it/s]


===> Initializing clients..


0it [00:00, ?it/s]


++++++++++++++++++++++++++++++
Global..
Train Loss: 2.292 | Train Acc: 12.159% |Test Loss: 2.292 | Test Acc: 12.248% |
++++++++++++++++++++++++++++++++++++++++++++++++++
################################################################################


In [5]:
# Import weights for aggregator
aggregator.load_state(args_.save_path)

# This is where the models are stored -- one for each mixture --> learner.model for nn
hypotheses = aggregator.global_learners_ensemble.learners

# obtain the state dict for each of the weights 
weights_h = []

for h in hypotheses:
    weights_h += [h.model.state_dict()]
    
weights = np.load("weights/cifar/21_09_28_first_transfers/train_client_weights.npy")
np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})

#print(weights)

# Set model weights
model_weights = []
num_models = 5

for i in range(num_models):
    model_weights += [weights[i]]
    
    
# Generate the weights to test on as linear combinations of the model_weights
models_test = []

for (w0,w1,w2) in model_weights:
    # first make the model with empty weights
    new_model = copy.deepcopy(hypotheses[0].model)
    new_model.eval()
    new_weight_dict = copy.deepcopy(weights_h[0])
    for key in weights_h[0]:
        new_weight_dict[key] = w0*weights_h[0][key] + w1*weights_h[1][key] + w2*weights_h[2][key]
    new_model.load_state_dict(new_weight_dict)
    models_test += [new_model]

In [6]:
# Combine Validation Data across all clients as test
data_x = []
data_y = []

for i in range(len(clients)):
    daniloader = clients[i].val_iterator
    for (x,y,idx) in daniloader.dataset:
        data_x.append(x)
        data_y.append(y)

data_x = torch.stack(data_x)
data_y = torch.stack(data_y)

# Create dataloader from validation dataset that allows for diverse batch size
dataloader = Custom_Dataloader(data_x, data_y)

### Boundary Transferer Class
- Used to calcualte direction of all 3 types
- Based off of (not subclass of) the Transferer class

In [249]:
class Boundary_Transferer(): 
    """
    - Load all the datasets but separate them
    - Intermediate values of featues after 2 convolution layers
    """
    
    def __init__(self, models_list, dataloader):
        
        self.models_list = models_list
        self.dataloader = dataloader
        
        self.base_nn_idx = None
        self.victim_idx = None
        
        self.fixed_point = None # {"idx","x","y"}
        self.comparison_set = None # For legitimate direction
        self.comparison_x = None
        self.comparison_y = None
        
    def select_data_point(self):
        """
        Select a single data point to use as comparison of different boundary types
        """
        self.fixed_point = {}
        idx, x, y = self.dataloader.select_single()
        self.fixed_point["idx"] = idx
        self.fixed_point["x"] = x
        self.fixed_point["y"] = y
        
        return 
    
    def select_comparison_set(self,batch_size):
        """
        Select multiple datapoints to use to compare 
        """
        
        xs, ys = self.dataloader.load_batch(batch_size)
        
        self.comparison_x = xs
        self.comparison_y = ys
        
        self.comparison_set = {}
        
        for i in range(batch_size):
            self.comparison_set[i] = {}
            self.comparison_set[i]["x"] = xs[i]
            self.comparison_set[i]["y"] = ys[i]
        
        return
    
    def measure_distance(self, x1, x2_set):
        
        x2_dist = torch.subtract(x2_set, x1)#.unsqueeze(0))
        x2_l2 = torch.linalg.norm(x2_dist.flatten(start_dim=1),ord = 2, dim=1)
        
        return x2_dist, x2_l2
    
    def legitimate_direction(self, batch_size, NN_idxs, new_point = True):
        """
        Calculate Legitimate Direction for a single point
        """
        
        # Select point of baseline comparison 
        if new_point:
            self.select_data_point()
        
        # Select set of comparison 
        self.select_comparison_set(batch_size)
        
        # Calculate X distance
        x_dists, x_dists_l2 = self.measure_distance(self.fixed_point["x"], self.comparison_x)
        
        # Classify all members of comparison set
        y_pred_nn = None
        min_dist_idx = None
        min_dist_unit_vector = None
        
        # Classify each data for each classifier
        temp_classified = self.models_list[self.base_nn_idx](self.comparison_x)
        y_pred_nn = torch.argmax(temp_classified,axis=1)

        # Filter twice - argmin (distance), conditioned on different label
        dist_mask = torch.where(y_pred_nn != self.fixed_point["y"], x_dists_l2, torch.max(x_dists_l2))
        min_dist_idx = torch.argmin(dist_mask)
        min_dist_unit_vector = torch.divide(x_dists[min_dist_idx, 
                                                torch.linalg.norm(x_dists[min_dist_idx.flatten(),ord=2))
            
            
            
        
        return x_dists[min_dist_idx[nn]], min_dist_unit_vector[nn]

In [250]:
t1 = Boundary_Transferer(models_list=models_test, dataloader=dataloader)

In [251]:
xd, md = t1.legitimate_direction(batch_size=10, NN_idxs=[0])

In [254]:
torch.linalg.norm(md.flatten(),ord=2)

tensor(1., device='cuda:0')

### Legitimate Direction Calculation
- Across test set perform classification for each of the nueral networks 
- Measure distance from a single point to all other points 
- Find the closest point with different label