# PCA Variance Measurement

TJ Kim <br/>
4/6/21

Updated <br/>
4/6/21

#### Objective: 
Look further down the dimensions of PCA analysis and observe where there is a "spike" in difference between datapoints nth dimension. <br/>

Deliverables
- Plot the "explanation" factor for each dimension that shows probability explained verses nth dimension
- Plot the variance per dimension across multiple client pairs (0-->1) (0-->2)

In [1]:
cd '/home/ubuntu/FedAtk/' 

/home/ubuntu/FedAtk


### Load Relevant Libraries and Modules

Load the relevant libraries for the federated learning code.

In [2]:
from transfer_attacks.DA_Transferer import *

import numpy as np
import torch
import matplotlib.pyplot as plt
import random
import csv
import os
import pickle
from torch.autograd import Variable
import copy
import pandas as pd

### Load Model and data

In [3]:
# Make directory in results for this experiment
# FL Architecture
client_idx = 0
victim_idxs = [1,2]
ifgsm_iterations = [10]
batch_size = 100

# Saved Neural Networks to Test on 
exp_names = ["exp4_neck2_0_head3"]

# Parameters to record for excel printing
num_clients = len(victim_idxs)
metrics = ['orig_acc','orig_sim','orig_acc_robust', 'orig_sim_robust', 
           'orig_acc_adv', 'orig_sim_adv','adv_sim','adv_hit','g_align',
           'g_align_robust', 'g_align_adv']

# Save 1 - neck2_head3 network per client metric storage
stored_per_client_fgsm = {}
stored_per_client_fgsm['num_clients'] = np.array(victim_idxs)
for i in metrics:
    stored_per_client_fgsm[i] = np.zeros(num_clients)

In [4]:
transferer = DA_Transferer(filename = exp_names[0])
transferer.generate_advNN(client_idx = client_idx)
transferer.generate_victims(client_idxs = victim_idxs)
transferer.load_niid_data(clients=victim_idxs)

transferer.ifsgm_params.set_params(batch_size = batch_size)

Loading  all_data_12_niid_0_keep_0_train_9.json
Loading  all_data_20_niid_0_keep_0_train_9.json
Loading  all_data_11_niid_0_keep_0_train_9.json
Loading  all_data_18_niid_0_keep_0_train_9.json




Loading  all_data_0_niid_0_keep_0_train_9.json
Loading  all_data_34_niid_0_keep_0_train_9.json
Loading  all_data_17_niid_0_keep_0_train_9.json
Loading  all_data_13_niid_0_keep_0_train_9.json
Loading  all_data_7_niid_0_keep_0_train_9.json
Loading  all_data_33_niid_0_keep_0_train_9.json
Loading  all_data_24_niid_0_keep_0_train_9.json
Loading  all_data_5_niid_0_keep_0_train_9.json


### Perform Attack and Log Metrics

In [5]:
transferer.ifsgm_params.set_params(iteration = ifgsm_iterations[0])
transferer.generate_xadv(atk_type = "ifsgm")
transferer.send_to_victims(victim_idxs)
transferer.check_empirical_metrics(orig_flag = True)

for i in range(len(victim_idxs)):
    j = victim_idxs[i]
    
    stored_per_client_fgsm['orig_acc'][i] = transferer.orig_acc_transfers[j]
    stored_per_client_fgsm['orig_sim'][i] = transferer.orig_similarities[j]
    stored_per_client_fgsm['adv_sim'][i] = transferer.adv_similarities[j]
    stored_per_client_fgsm['adv_hit'][i] = transferer.adv_target_hit[j]
    stored_per_client_fgsm['g_align'][i] = transferer.metric_alignment[j]
    
    stored_per_client_fgsm['orig_acc_robust'][i] = transferer.orig_acc_transfers_robust[j]
    stored_per_client_fgsm['orig_sim_robust'][i] = transferer.orig_similarities_robust[j]
    stored_per_client_fgsm['orig_acc_adv'][i] = transferer.orig_acc_transfers_adv[j]
    stored_per_client_fgsm['orig_sim_adv'][i] = transferer.orig_similarities_adv[j]
    
    stored_per_client_fgsm['g_align_robust'][i] = transferer.metric_alignment_robust[j]
    stored_per_client_fgsm['g_align_adv'][i] = transferer.metric_alignment_adv[j]

    
df3 = pd.DataFrame(data=stored_per_client_fgsm).T
df3

Unnamed: 0,0,1
num_clients,1.0,2.0
orig_acc,0.81,0.81
orig_sim,0.6,0.61
orig_acc_robust,0.96,0.882353
orig_sim_robust,0.64,0.588235
orig_acc_adv,0.66,0.772727
orig_sim_adv,0.56,0.621212
adv_sim,0.5,0.66
adv_hit,0.5,0.66
g_align,1.129214,1.129928


### Obtain Multiple dimensions for PCA

In [40]:
# Plot data storage
num_PCA_dim = 10
store_PCA_diff = np.zeros([len(victim_idxs),2,num_PCA_dim])

transferer.set_data(mode='client', datasets = victim_idxs, batch_size = batch_size)
transferer.set_data_advNN()
transferer.forward_pass()

# Loop through each user here
for i in range(len(victim_idxs)):
    j = victim_idxs[i]
    transferer.obtain_PCA(analyze_atk = True, advrobust_idx = j, dim=num_PCA_dim)
    data = transferer.PCA_data
    filtered_data = {}
    explained_var_ratio = transferer.explained_var_ratio
    
    targets = [[victim_idxs[i],0],[victim_idxs[i],1],[victim_idxs[i],2]]
    
    for target in targets:
        idx_temp0 = transferer.PCA_data['labels'][:,0] == target[0]
        idx_temp1 = transferer.PCA_data['labels'][:,1] == target[1]

        indicesToKeep = idx_temp0 * idx_temp1
        
        filtered_data[target[1]] = data['data'][indicesToKeep]
    
    adv_pca = np.mean(filtered_data[1],axis=0) - np.mean(filtered_data[0],axis=0)
    robust_pca = np.mean(filtered_data[2],axis=0) - np.mean(filtered_data[0],axis=0)
    
    # Generate two plots

In [39]:
adv_pca

array([-2.73994584,  0.13669006, -1.07305928,  0.77575332, -0.43512747,
       -2.22733156,  0.24756661, -0.61459024, -0.54539274,  0.54703835])

In [41]:
robust_pca

array([-0.87066286, -3.1380156 , -3.29984733,  2.91827102, -1.47991722,
       -2.82805197, -1.88795394, -0.24294175,  1.16071549,  1.0442999 ])

In [27]:
indicesToKeep

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,