# G Sweep Analysis
TJ Kim

12.31.21

#### Summary:
- Run tests on 5 neural networks of different adversarial values G = [0, 0.25, 0.5, 0.75, 1]
- Resource has been set at E[Ru] = 0.5 in [0.2, 0.8]
- Relevant metrics are all global (accuracy, target hit, target miss, adversarial/benign distance, gradient alignment, sim benign/adv)
- Make new function - avg_nodiag(nparray/panda)

In [1]:
cd /home/ubuntu/FedEM/

/home/ubuntu/FedEM


#### Import Relevant Libraries

In [2]:
# Import General Libraries
import os
import argparse
import torch
import copy
import pickle
import random
import numpy as np
import pandas as pd

# Import FedEM based Libraries
from utils.utils import *
from utils.constants import *
from utils.args import *
from torch.utils.tensorboard import SummaryWriter
from run_experiment import *
from models import *

# Import Transfer Attack
from transfer_attacks.Personalized_NN import *
from transfer_attacks.Params import *
from transfer_attacks.Transferer import *
from transfer_attacks.Args import *

from transfer_attacks.TA_utils import *
from transfer_attacks.Boundary_Transferer import *

#### Function - Calculate Mean without Diagonal

In [3]:
# Perform np.mean without the diagonal
def avg_nondiag(array2d):
    d1 = array2d.shape[0]
    d2 = array2d.shape[1]
    
    counter = 0
    val = 0
    
    for i1 in range(d1):
        for i2 in range(d2):
            if i1 != i2:
                counter+=1
                val += array2d[i1,i2]
    
    return val/counter

#### Generate Dummy Aggregator and Client

In [4]:
# Manually set argument parameters
args_ = Args()
args_.experiment = "cifar10"
args_.method = "FedEM_adv"
args_.decentralized = False
args_.sampling_rate = 1.0
args_.input_dimension = None
args_.output_dimension = None
args_.n_learners= 3
args_.n_rounds = 10
args_.bz = 128
args_.local_steps = 1
args_.lr_lambda = 0
args_.lr =0.03
args_.lr_scheduler = 'multi_step'
args_.log_freq = 10
args_.device = 'cuda'
args_.optimizer = 'sgd'
args_.mu = 0
args_.communication_probability = 0.1
args_.q = 1
args_.locally_tune_clients = False
args_.seed = 1234
args_.verbose = 1
args_.save_path = 'weights/cifar/21_12_30_feddef2_n40_linf0_5/'
args_.validation = False
args_.num_user = 40

# Generate the dummy values here
aggregator, clients = dummy_aggregator(args_, num_user=40)

==> Clients initialization..
===> Building data iterators..


100%|██████████████████████████████████████████| 80/80 [00:00<00:00, 176.34it/s]


===> Initializing clients..


100%|███████████████████████████████████████████| 80/80 [00:52<00:00,  1.52it/s]


==> Test Clients initialization..
===> Building data iterators..


0it [00:00, ?it/s]


===> Initializing clients..


0it [00:00, ?it/s]


++++++++++++++++++++++++++++++
Global..
Train Loss: 2.292 | Train Acc: 12.195% |Test Loss: 2.292 | Test Acc: 12.291% |
++++++++++++++++++++++++++++++++++++++++++++++++++
################################################################################


#### Load Dataset From Client Validation

In [5]:
# Combine Validation Data across all clients as test
data_x = []
data_y = []

for i in range(len(clients)):
    daniloader = clients[i].val_iterator
    for (x,y,idx) in daniloader.dataset:
        data_x.append(x)
        data_y.append(y)

data_x = torch.stack(data_x)
data_y = torch.stack(data_y)

# Create dataloader from validation dataset that allows for diverse batch size
dataloader = Custom_Dataloader(data_x, data_y)

#### Import Experiment Name Information

Used later to loop through.

In [18]:
# exp_names = ['21_12_30_feddef_n40_linf0_5_G0_0/','21_12_30_feddef_n40_linf0_5_G0_25/', '21_12_30_feddef_n40_linf0_5/',
#              '21_12_30_feddef_n40_linf0_5_G0_75/', '21_12_30_feddef_n40_linf0_5_G1_0/']

# R - Sweep Resources
# exp_names = ['22_01_01_feddef_n40_linf0_5_G0_5_R0_1/','22_01_01_feddef_n40_linf0_5_G0_5_R0_2/',
#              '22_01_01_feddef_n40_linf0_5_G0_5_R0_4/', '21_12_30_feddef_n40_linf0_5/', 
#              '22_01_01_feddef_n40_linf0_5_G0_5_R0_6/']

# G - Full Resources
# exp_names = ['21_12_30_feddef_n40_linf0_5_G0_0/','22_01_02_feddef_n40_linf0_5_G0_25_R1_0/',
#              '22_01_02_feddef_n40_linf0_5_G0_5_R1_0/', '22_01_02_feddef_n40_linf0_5_G0_75_R1_0/',
#              '22_01_02_feddef_n40_linf0_5_G1_0_R1_0/']

# Q - adv freq
# exp_names = ['22_01_03_feddef_n40_linf0_5_G0_5_R1_0_Q3/', '22_01_03_feddef_n40_linf0_5_G0_5_R1_0_Q5/',
#              '22_01_02_feddef_n40_linf0_5_G0_5_R1_0/', '22_01_03_feddef_n40_linf0_5_G0_5_R1_0_Q20/']

# Ep - perturbation amount
exp_names = ['22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps0_1/', '22_01_02_feddef_n40_linf0_5_G0_5_R1_0/',
             '22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps1/', '22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps2/',
             '22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps4/']

base = 'weights/cifar/'
train_item = 'train_client_weights.npy'

#### Set Measurement Metrics

In [19]:
num_victims = 8
num_exp = len(exp_names)
# Set Up Dictionaries -- list holds the adversary idx
exp_logs = {}

for j in range(num_exp):
    logs_adv = []

    for i in range(num_victims):
        adv_dict = {}
        adv_dict['orig_acc_transfers'] = None
        adv_dict['orig_similarities'] = None
        adv_dict['adv_acc_transfers'] = None
        adv_dict['adv_similarities_target'] = None
        adv_dict['adv_similarities_untarget'] = None
        adv_dict['adv_target'] = None
        adv_dict['adv_miss'] = None
        adv_dict['metric_alignment'] = None
        adv_dict['ib_distance_legit'] = None
        adv_dict['ib_distance_adv'] = None
        
        logs_adv += [adv_dict]
    
    exp_logs[j] = logs_adv


#### Load Each Model and Perform Metric

In [20]:
# Inter Boundary Distance Metric
num_trials = 50
batch_size = 5000


for j in range(num_exp):
    print('processing file', exp_names[j], '...')
    
    # Change name if need be
    args_.save_path = base + exp_names[j]

    # Import weights for aggregator
    aggregator.load_state(args_.save_path)

    # This is where the models are stored -- one for each mixture --> learner.model for nn
    hypotheses = aggregator.global_learners_ensemble.learners

    # obtain the state dict for each of the weights 
    weights_h = []

    for h in hypotheses:
        weights_h += [h.model.state_dict()]
        
    weight_name = args_.save_path + train_item
    weights = np.load(weight_name)
    np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})

    # Set model weights
    model_weights = []
    num_models = num_victims

    for i in range(num_models):
        model_weights += [weights[i]]


    # Generate the weights to test on as linear combinations of the model_weights
    models_test = []

    for (w0,w1,w2) in model_weights:
        # first make the model with empty weights
        new_model = copy.deepcopy(hypotheses[0].model)
        new_model.eval()
        new_weight_dict = copy.deepcopy(weights_h[0])
        for key in weights_h[0]:
            new_weight_dict[key] = w0*weights_h[0][key] + w1*weights_h[1][key] + w2*weights_h[2][key]
        new_model.load_state_dict(new_weight_dict)
        models_test += [new_model]
        
    # Run Measurements for both targetted and untargeted analysis
    victim_idxs = range(num_victims)
    
    t1 = Transferer(models_list=models_test, dataloader=dataloader)
    t1.generate_victims(victim_idxs)
    
    t2 = Boundary_Transferer(models_list=models_test, dataloader=dataloader)
    t2.victim_idx = victim_idxs
    
    for adv_idx in victim_idxs:
        print("\t Adv idx:", adv_idx)
        # Perform Attacks
        t1.atk_params = PGD_Params()
        t1.atk_params.set_params(batch_size=500, iteration = 10,
                       target = 5, x_val_min = torch.min(data_x), x_val_max = torch.max(data_x),
                       step_size = 0.05, step_norm = "inf", eps = 4.5, eps_norm = 2)
        
        t1.generate_advNN(adv_idx)
        t1.generate_xadv(atk_type = "pgd")
        t1.send_to_victims(victim_idxs)
        t1.check_empirical_metrics(orig_flag = True)

        # Log Performance
        exp_logs[j][adv_idx]['orig_acc_transfers'] = copy.deepcopy(t1.orig_acc_transfers)
        exp_logs[j][adv_idx]['orig_similarities'] = copy.deepcopy(t1.orig_similarities)
        exp_logs[j][adv_idx]['adv_acc_transfers'] = copy.deepcopy(t1.adv_acc_transfers)
        exp_logs[j][adv_idx]['adv_similarities_target'] = copy.deepcopy(t1.adv_similarities)        
        exp_logs[j][adv_idx]['adv_target'] = copy.deepcopy(t1.adv_target_hit)
        exp_logs[j][adv_idx]['metric_alignment'] = copy.deepcopy(t1.metric_alignment)
        
        # Miss attack
        t1.atk_params.set_params(batch_size=500, iteration = 10,
                       target = -1, x_val_min = torch.min(data_x), x_val_max = torch.max(data_x),
                       step_size = 0.05, step_norm = "inf", eps = 4.5, eps_norm = 2)
        t1.generate_xadv(atk_type = "pgd")
        t1.send_to_victims(victim_idxs)
        exp_logs[j][adv_idx]['adv_miss'] = copy.deepcopy(t1.adv_acc_transfers)
        exp_logs[j][adv_idx]['adv_similarities_untarget'] = copy.deepcopy(t1.adv_similarities)
        
        # Inter-boundary Distance
#         t2.base_nn_idx = adv_idx
#         t2.atk_params = PGD_Params()
#         t2.atk_params.set_params(batch_size=500, iteration = 30,
#                        target = -1, x_val_min = torch.min(data_x), x_val_max = torch.max(data_x),
#                        step_size = 0.05, step_norm = "inf", eps = 5, eps_norm = 2)
#         t2.set_adv_NN(t2.base_nn_idx)
        
#         dists_measure_legit = np.zeros([num_trials, len(t2.victim_idx)])
#         dists_measure_adv = np.zeros([num_trials, len(t2.victim_idx)])
#         dists_measure_adv_ensemble = np.zeros([num_trials, len(t2.victim_idx)])
        
#         for t in range(num_trials):
#             print('\t \t num_trial', t)
        
#             base_ep_legit, victim_eps_legit = t2.legitimate_direction(batch_size=batch_size, ep_granularity = 0.3, 
#                                                                   rep_padding = 1000, new_point = True,print_res = False)

#             base_ep_adv, victim_eps_adv = t2.adversarial_direction(ep_granularity = 0.3, 
#                                                                   rep_padding = 1000, new_point = False,print_res = False)

#             idx = 0
#             for key, value in victim_eps_legit.items():
#                 dists_measure_legit[t,idx] = np.abs(base_ep_legit-value)
#                 idx+=1

#             idx = 0
#             for key, value in victim_eps_adv.items():
#                 dists_measure_adv[t,idx] = np.abs(base_ep_adv - value)
#                 idx+=1
        
#         exp_logs_targetted[j][adv_idx]['ib_distance_legit'] = copy.deepcopy(dists_measure_legit)
#         exp_logs_targetted[j][adv_idx]['ib_distance_adv'] = copy.deepcopy(dists_measure_adv)

    del t1, models_test

processing file 22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps0_1/ ...
	 Adv idx: 0
	 Adv idx: 1
	 Adv idx: 2
	 Adv idx: 3
	 Adv idx: 4
	 Adv idx: 5
	 Adv idx: 6
	 Adv idx: 7
processing file 22_01_02_feddef_n40_linf0_5_G0_5_R1_0/ ...
	 Adv idx: 0
	 Adv idx: 1
	 Adv idx: 2
	 Adv idx: 3
	 Adv idx: 4
	 Adv idx: 5
	 Adv idx: 6
	 Adv idx: 7
processing file 22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps1/ ...
	 Adv idx: 0
	 Adv idx: 1
	 Adv idx: 2
	 Adv idx: 3
	 Adv idx: 4
	 Adv idx: 5
	 Adv idx: 6
	 Adv idx: 7
processing file 22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps2/ ...
	 Adv idx: 0
	 Adv idx: 1
	 Adv idx: 2
	 Adv idx: 3
	 Adv idx: 4
	 Adv idx: 5
	 Adv idx: 6
	 Adv idx: 7
processing file 22_01_04_feddef_n40_linf0_5_G0_5_R1_0_Q10_eps4/ ...
	 Adv idx: 0
	 Adv idx: 1
	 Adv idx: 2
	 Adv idx: 3
	 Adv idx: 4
	 Adv idx: 5
	 Adv idx: 6
	 Adv idx: 7


#### Obtain Metric Values and Make Table

In [21]:
metrics = ['orig_acc_transfers','orig_similarities','adv_acc_transfers','adv_similarities_target',
           'adv_similarities_untarget','adv_target','adv_miss'] #,'metric_alignment']

val_keys = ['orig_acc', 'orig_sim', 'adv_acc','adv_sim_target','adv_sim_untarget',
            'adv_target','adv_miss'] #,'grad_align']

# Set Up Dictionaries -- list holds the adversary idx
exp_tables = {}
exp_values = {}

for j in range(num_exp):
    curr_dict = {}
    val_dict = {}
    
    orig_acc = np.zeros([num_victims,num_victims]) 
    orig_sim = np.zeros([num_victims,num_victims]) 
    adv_acc = np.zeros([num_victims,num_victims]) 
    adv_sim_target = np.zeros([num_victims,num_victims]) 
    adv_sim_untarget = np.zeros([num_victims,num_victims]) 
    adv_target= np.zeros([num_victims,num_victims]) 
    adv_miss = np.zeros([num_victims,num_victims]) 
    grad_align = np.zeros([num_victims,num_victims]) 

    for adv_idx in range(len(victim_idxs)):
        for victim in range(len(victim_idxs)):
            orig_acc[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[0]][victim_idxs[victim]].data.tolist()
            orig_sim[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[1]][victim_idxs[victim]].data.tolist()
            adv_acc[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[2]][victim_idxs[victim]].data.tolist()
            adv_sim_target[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[3]][victim_idxs[victim]].data.tolist()
            adv_sim_untarget[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[4]][victim_idxs[victim]].data.tolist()
            adv_target[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[5]][victim_idxs[victim]].data.tolist()
            adv_miss[adv_idx,victim] = exp_logs[j][victim_idxs[adv_idx]][metrics[6]][victim_idxs[victim]].data.tolist()
#             grad_align[adv_idx,victim] = exp_logs[j][adv_idx][metrics[7]][victim].data.tolist()
            
    curr_dict['orig_acc'] = copy.deepcopy(orig_acc) 
    curr_dict['orig_sim'] = copy.deepcopy(orig_sim) 
    curr_dict['adv_acc'] = copy.deepcopy(adv_acc) 
    curr_dict['adv_sim_target'] = copy.deepcopy(adv_sim_target)
    curr_dict['adv_sim_untarget'] = copy.deepcopy(adv_sim_untarget)
    curr_dict['adv_target'] = copy.deepcopy(adv_target)
    curr_dict['adv_miss'] = copy.deepcopy(adv_miss)
#     curr_dict['grad_align'] = copy.deepcopy(grad_align)
    
    val_dict['orig_acc'] = np.mean(orig_acc)
    val_dict['orig_sim'] = avg_nondiag(orig_sim) 
    val_dict['adv_acc'] = np.mean(adv_acc) 
    val_dict['adv_sim_target'] = avg_nondiag(adv_sim_target)
    val_dict['adv_sim_untarget'] = avg_nondiag(adv_sim_untarget)
    val_dict['adv_target'] = avg_nondiag(adv_target)
    val_dict['adv_miss'] = avg_nondiag(adv_miss)
#     val_dict['grad_align'] = avg_nondiag(grad_align)
    
    exp_tables[j] = copy.deepcopy(curr_dict)
    exp_values[j] = copy.deepcopy(val_dict)

In [22]:
import pandas as pd
data_store = np.zeros([len(val_keys), len(exp_names)])
for j in range(len(exp_names)):
    for m in range(len(val_keys)):
        data_store[m,j] = exp_values[j][val_keys[m]]

# row_items = ['G0.0','G0.25','G0.5','G0.75','G1.0']
# row_items = ['R0.1','R0.2','R0.4','R0.5','R0.6']
# row_items = ['Q3', 'Q5', 'Q10', 'Q20']
row_items = ['ep0.1', 'ep0.5', 'ep1.0', 'ep2.0','ep4.0']
df = pd.DataFrame(data_store, columns = row_items, index = val_keys)


In [23]:
df

Unnamed: 0,ep0.1,ep0.5,ep1.0,ep2.0,ep4.0
orig_acc,0.719375,0.6995,0.723344,0.781813,0.765813
orig_sim,0.681321,0.687143,0.735821,0.792036,0.760143
adv_acc,0.666969,0.571156,0.536406,0.448031,0.436406
adv_sim_target,0.631214,0.588857,0.663821,0.72775,0.685857
adv_sim_untarget,0.472643,0.453536,0.522464,0.601643,0.557071
adv_target,0.172821,0.2485,0.338643,0.458214,0.461607
adv_miss,0.533679,0.405964,0.308321,0.174179,0.114857
