### NN Vector Pull for Direction

Oct 31 2023
TJ Kim

##### Summary
Load any NN model. Obtain layer by layer the unit direction vector (against vector of zeros). See if cosine similarity can be used as a metric to measure direction.

In [2]:
cd /home/ubuntu/fedatk_unl_tj/

/home/ubuntu/fedatk_unl_tj


In [3]:
# Import General Libraries
import os
import argparse
import torch
import copy
import pickle
import random
import numpy as np
import pandas as pd

# Import FedEM based Libraries
from utils.utils import *
from utils.constants import *
from utils.args import *
from utils.util_notebooks import *
from run_experiment import *
from models import *

# Import Transfer Attack
from transfer_attacks.Personalized_NN import *
from transfer_attacks.Params import *
from transfer_attacks.Transferer import *
from transfer_attacks.Args import *
from transfer_attacks.TA_utils import *

In [4]:
setting, num_user = "FedAvg", 40

try: # Skip loading if already loaded
    aggregator
except:
    aggregator, clients, args_ = set_args(setting, num_user)

# Load models for FAT and FedAvg
save_path_FAT = 'weights/cifar10/230922_baseline_train/FAT/'
save_path_FedAvg = 'weights/cifar10/230922_baseline_train/fedavg/'

model_FAT = copy.deepcopy(import_model_weights(num_user, setting, save_path_FAT, aggregator, args_)[0])
model_Fedavg = import_model_weights(num_user, setting, save_path_FedAvg, aggregator, args_)[0]


==> Clients initialization..
===> Building data iterators..


100%|██████████| 80/80 [00:00<00:00, 163.08it/s]


===> Initializing clients..


100%|██████████| 80/80 [00:15<00:00,  5.27it/s]


==> Test Clients initialization..
===> Building data iterators..


0it [00:00, ?it/s]


===> Initializing clients..


0it [00:00, ?it/s]


++++++++++++++++++++++++++++++
Global..
Train Loss: 2.299 | Train Acc: 10.548% |Test Loss: 2.297 | Test Acc: 10.511% |
++++++++++++++++++++++++++++++++++++++++++++++++++
################################################################################


In [35]:
from sklearn.metrics.pairwise import cosine_similarity

def matrix_cosine_similarity(mat1, mat2):
    vec1 = mat1.cpu().numpy().flatten()
    vec2 = mat2.cpu().numpy().flatten()
    return cosine_similarity([vec1], [vec2])[0][0]

def get_adv_acc(aggregator, model, batch_size = 500):
    num_clients = len(aggregator.clients)
    
    # Dataloader for datax
    data_x = []
    daniloader = clients[0].val_iterator
    for (x,y,idx) in daniloader.dataset:
        data_x.append(x)

    data_x = torch.stack(data_x)
    victim_idxs = range(num_clients)

    dataloader = load_client_data(clients = aggregator.clients, c_id = 0, mode = 'test')
    batch_size = min(batch_size, dataloader.y_data.shape[0])

    t1 = Transferer(models_list = [model] * num_clients, dataloader=dataloader)
    t1.generate_victims(victim_idxs)
    t1.atk_params = PGD_Params()
    t1.atk_params.set_params(batch_size=batch_size, iteration = 10, target = -1,
                             x_val_min = torch.min(data_x), x_val_max = torch.max(data_x),
                             step_size = 0.05, step_norm = "inf", eps = 4, eps_norm = 2)
    t1.generate_advNN(0)
    t1.generate_xadv(atk_type="pgd")
    t1.send_to_victims(victim_idxs)

    
    return t1.adv_acc_transfers[0]

def pull_model_from_agg(aggregator):
        
    # This is where the models are stored -- one for each mixture --> learner.model for nn
    hypotheses = aggregator.global_learners_ensemble.learners

    # obtain the state dict for each of the weights 
    weights_h = []

    for h in hypotheses:
        weights_h += [h.model.state_dict()]
    
    # first make the model with empty weights
    new_model = copy.deepcopy(hypotheses[0].model)
    return new_model

In [6]:
# Obtain parameters for each layer
params_FAT = model_FAT.state_dict()
params_FedAvg = model_Fedavg.state_dict()

# Just take the values of weights and bias for the model
desired_keys = [key for key in params_FAT.keys() if 'weight' in key or 'bias' in key]

In [37]:
params_FAT.keys()

odict_keys(['features.0.0.weight', 'features.0.1.weight', 'features.0.1.bias', 'features.0.1.running_mean', 'features.0.1.running_var', 'features.0.1.num_batches_tracked', 'features.1.conv.0.0.weight', 'features.1.conv.0.1.weight', 'features.1.conv.0.1.bias', 'features.1.conv.0.1.running_mean', 'features.1.conv.0.1.running_var', 'features.1.conv.0.1.num_batches_tracked', 'features.1.conv.1.weight', 'features.1.conv.2.weight', 'features.1.conv.2.bias', 'features.1.conv.2.running_mean', 'features.1.conv.2.running_var', 'features.1.conv.2.num_batches_tracked', 'features.2.conv.0.0.weight', 'features.2.conv.0.1.weight', 'features.2.conv.0.1.bias', 'features.2.conv.0.1.running_mean', 'features.2.conv.0.1.running_var', 'features.2.conv.0.1.num_batches_tracked', 'features.2.conv.1.0.weight', 'features.2.conv.1.1.weight', 'features.2.conv.1.1.bias', 'features.2.conv.1.1.running_mean', 'features.2.conv.1.1.running_var', 'features.2.conv.1.1.num_batches_tracked', 'features.2.conv.2.weight', 'fea

In [7]:
# Find mag norm 
mag_norm_FedAvg2FAT = []
for key in desired_keys: #params_FAT:

    diff = params_FAT[key] - params_FedAvg[key]
    l2_norm = torch.norm(diff, p=2)

    mag_norm_FedAvg2FAT += [diff/torch.norm(diff,p=2)]


In [36]:
# Perform 50 rounds of full overfitting on benign data on 200R FAT model
num_rounds = 10

# Test performance of aggregator on data 
aggregator.load_state(dir_path = save_path_FAT)
aggregator.update_clients()
aggregator.write_logs()
model_FAT = pull_model_from_agg(aggregator)
print(get_adv_acc(aggregator, model_FAT))

for i in range(num_rounds):
    print(i)
    aggregator.mix()
aggregator.update_clients()
aggregator.write_logs()

model_overfit = pull_model_from_agg(aggregator)
get_adv_acc(aggregator, model_overfit)


++++++++++++++++++++++++++++++
Global..
Train Loss: 0.170 | Train Acc: 95.705% |Test Loss: 0.602 | Test Acc: 80.222% |
++++++++++++++++++++++++++++++++++++++++++++++++++
################################################################################
0
1
2
3
4
5
6
7
8
9
++++++++++++++++++++++++++++++
Global..
Train Loss: 0.155 | Train Acc: 96.236% |Test Loss: 0.585 | Test Acc: 80.878% |
++++++++++++++++++++++++++++++++++++++++++++++++++
################################################################################
++++++++++++++++++++++++++++++
Global..
Train Loss: 0.155 | Train Acc: 96.236% |Test Loss: 0.585 | Test Acc: 80.878% |
++++++++++++++++++++++++++++++++++++++++++++++++++
################################################################################


tensor(0.4125, device='cuda:0')

In [31]:
get_adv_acc(aggregator, model_overfit)


tensor(0., device='cuda:0')