Running Adversarial Neuron Pruning

In [1]:
import torch
import torchvision

from torchsummary import summary

import numpy as np

In [2]:
torch.cuda.is_available()

True

In [3]:
#check gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load dataset

In [4]:
import torchvision.transforms as transforms

transform_train = transforms.Compose([
    # transforms.RandomCrop(32, padding=4),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [5]:
cifar10_train = torchvision.datasets.CIFAR10('datasets/cifar_10', download=True, transform=transform_train)
cifar10_test = torchvision.datasets.CIFAR10('datasets/cifar_10', train=False, download=True, transform=transform_test)

Files already downloaded and verified
Files already downloaded and verified


### modify dataset

In [6]:
# we use 0.9 of the whole dataset as the poisoned set
poisoned_set_ratio = 0.9

In [7]:
dataset_images = np.array([c[0] for c in cifar10_train])
dataset_labels = np.array([c[1] for c in cifar10_train])

In [8]:
# create a subset of the dataset
l = len(cifar10_train)
indices = np.arange(l)

In [9]:
np.random.seed(594462)
np.random.shuffle(indices)

# since we use 0.9 of the set as backdoored training set, we will use the rest 0.1 as fixing set
keep_indices = indices[int(l * poisoned_set_ratio):]

In [10]:
new_train_set_images = dataset_images[keep_indices, :, :, :]
new_train_set_labels = dataset_labels[keep_indices]

In [11]:
# create a new training set
new_train_set = torch.utils.data.TensorDataset(torch.tensor(new_train_set_images), torch.tensor(new_train_set_labels))

In [12]:
train_loader = torch.utils.data.DataLoader(new_train_set, batch_size=128, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(cifar10_test, batch_size=200, shuffle=False, num_workers=4)

### Create and load the ResNet18

In [13]:
num_classes = 10

In [14]:
res18 = torchvision.models.resnet18().cuda() if torch.cuda.is_available() else torchvision.models.resnet18()
res18.fc = torch.nn.Linear(res18.fc.in_features, num_classes).cuda() if torch.cuda.is_available() else torch.nn.Linear(res18.fc.in_features, num_classes)

In [15]:
# check if the parameters are in cuda
next(res18.parameters()).is_cuda

True

In [16]:
res18.load_state_dict(torch.load(f'saved_models/ResNet18-CIFAR10-backdoored-5-Epoch-200.pth'))

  res18.load_state_dict(torch.load(f'saved_models/ResNet18-CIFAR10-backdoored-5-Epoch-200.pth'))


<All keys matched successfully>

### Create ANP wrapper

In [17]:
from ANP import *

In [18]:
anp_system = ANPWrapper(res18, tradeoff=0.2, lr=0.2, ep=0.6)

In [19]:
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(anp_system.weight_masks_optimizer, T_max=200)

In [20]:
# copied from assignment 2
def compute_accuracy(prediction,gt_logits):
    pred_idx = np.argmax(prediction,1,keepdims=True)
    matches = pred_idx == gt_logits[:,None]
    acc = matches.mean()
    return acc

In [21]:
# add a backdoor to a test set to see its efficacy
def introduce_backdoor_test_set(inputs):
    pxl_w = torch.tensor((1.0, 1.0, 1.0))
    pxl_b = torch.tensor((0.0, 0.0, 0.0))
    # pxl_w = (1.0 - 0.4914) / 0.2023
    # pxl_b = (0.0 - 0.4914) / 0.2023
    all_indices = torch.arange(inputs.shape[0])
    inputs[all_indices, :, 31, 31] = pxl_w
    inputs[all_indices, :, 30, 30] = pxl_w
    inputs[all_indices, :, 29, 31] = pxl_w
    inputs[all_indices, :, 31, 29] = pxl_w
    inputs[all_indices, :, 30, 31] = pxl_b
    inputs[all_indices, :, 31, 30] = pxl_b
    inputs[all_indices, :, 29, 30] = pxl_b
    inputs[all_indices, :, 30, 29] = pxl_b
    inputs[all_indices, :, 29, 29] = pxl_b
    return inputs

In [None]:
test_acc_list = []
asr_list = []

# train for this many epochs
for epoch in range(50):
    anp_system.model.train()
    
    i = 0
    total_weight_masks_loss = 0
    for inputs, label in train_loader:
        inputs, label = inputs.to(device), label.to(device)
        # perform perturb step
        weight_masks_loss = anp_system.perturb_step(inputs, label)
        total_weight_masks_loss += weight_masks_loss
        # print(f'epoch: {epoch} | iteration: {i} | weight_mask_loss: {weight_masks_loss}')
        i += 1
    print(f'epoch: {epoch} | total_weight_masks_loss: {total_weight_masks_loss}')

    # in eval mode, we test backdoor effectiveness
    anp_system.model.eval()
    
    # testing loop (normal data)
    total_test_acc = 0
    test_item_ct = 0
    with torch.no_grad():
        for inputs, label in test_loader:
            inputs, label = inputs.to(device), label.to(device)
            
            pred = anp_system.model(inputs)
            accuracy = compute_accuracy(pred.cpu().detach().numpy(),label.cpu().detach().numpy())
            
            total_test_acc += accuracy * inputs.shape[0]
            test_item_ct += inputs.shape[0]
    print(f'Test Accuracy: {total_test_acc/test_item_ct}')
    test_acc_list.append(total_test_acc/test_item_ct)
    
    # test with backdoor inserted to training set images
    backdoor_success_ct = 0
    backdoor_item_ct = 0
    with torch.no_grad():
        for inputs, label in test_loader:
            inputs = introduce_backdoor_test_set(inputs).to(device)
            
            pred = anp_system.model(inputs)
            pred_lbls = np.argmax(pred.cpu().detach().numpy(),1,keepdims=True)

            backdoor_success_ct += np.sum(pred_lbls == 0)
            backdoor_item_ct += inputs.shape[0]
    
    print(f'Backdoor Success Rate: {backdoor_success_ct/backdoor_item_ct}')
    asr_list.append(backdoor_success_ct/backdoor_item_ct)

    # scheduler.step()

epoch: 0 | total_weight_masks_loss: 173.31273517012596
Test Accuracy: 0.5866
Backdoor Success Rate: 0.9691
epoch: 1 | total_weight_masks_loss: 121.9714292883873
Test Accuracy: 0.6519


In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(test_acc_list)
plt.plot(asr_list)

### Below are messy debug pokings

##### weight masks values histogram

In [None]:
all_masks_values = []

for name in anp_system.weight_masks:
    weight_mask_tensor = anp_system.weight_masks[name]
    all_masks_values.append(weight_mask_tensor.cpu().detach().numpy().flatten())

all_masks_values = np.concatenate(all_masks_values)

In [None]:
all_masks_values.shape

In [None]:
plt.hist(all_masks_values, bins=20, log=True)

In [None]:
plt.hist(all_masks_values, bins=20)

In [None]:
# np.mean(all_masks_values)
np.std(all_masks_values)    # 0.0? they did not change at all!

In [None]:
all_masks_grad_values = []

for name in anp_system.weight_masks:
    weight_mask_tensor = anp_system.weight_masks[name]
    all_masks_grad_values.append(weight_mask_tensor.grad.cpu().detach().numpy().flatten())

all_masks_grad_values = np.concatenate(all_masks_grad_values)

##### perturbation values histogram

In [None]:
anp_system.layer_extra_params.keys()

In [None]:
anp_system.layer_extra_params['conv1']

In [None]:
all_perturbations_values = []

for name in anp_system.weight_perturbations:
    weight_perturbation_tensor = anp_system.weight_perturbations[name]
    all_perturbations_values.append(weight_perturbation_tensor.cpu().detach().numpy().flatten())

all_perturbations_values = np.concatenate(all_perturbations_values)

In [None]:
all_perturbations_values.shape

In [None]:
plt.hist(all_perturbations_values, bins=20, log=True)

##### confirms that the param tensors are same objects

In [None]:
anp_system.weight_perturbations['fc.weight'].grad

In [None]:
anp_system.weight_perturbations['fc.weight'] is anp_system.layer_extra_params['fc']['delta']

In [None]:
anp_system.weight_masks['fc.weight'] is anp_system.layer_extra_params['fc']['m']

In [None]:
anp_system.weight_masks['conv1.weight'] is anp_system.layer_extra_params['conv1']['m']

In [None]:
anp_system.layer_extra_params['conv1']['m'].requires_grad

##### regarding values of the tensors

In [None]:
torch.amin(anp_system.layer_extra_params['conv1']['m'])

In [None]:
anp_system.layer_extra_params['conv1']['m'].min()

In [None]:
anp_system.ep

In [None]:
anp_system.weight_perturbations['fc.weight']

In [None]:
anp_system.weight_perturbations['fc.weight'].clamp(-0.4, 0.4)

In [None]:
anp_system.weight_perturbations['fc.weight']

##### shape of parameters

In [None]:
anp_system.layer_extra_params['conv1']['m'].shape

In [None]:
anp_system.layer_extra_params['fc']['m'].shape

In [None]:
anp_system.layer_extra_params['fc']['delta'].shape

In [None]:
anp_system.layer_extra_params['fc']['xi'].shape

In [None]:
anp_system.layer_extra_params['conv1']['delta'].shape

In [None]:
anp_system.layer_extra_params['layer1.0.conv1']['delta'].shape

In [None]:
anp_system.layer_extra_params['layer2.0.conv2']['delta'].shape


In [None]:
anp_system.layer_extra_params['layer2.1.conv2']['delta'].shape


In [None]:
za = torch.ones(anp_system.layer_extra_params['layer2.1.conv2']['delta'].shape[:1]).cuda()

In [None]:
za.shape

In [None]:
num_neurons = 0

for name in anp_system.weight_masks:
    weight_mask_tensor = anp_system.weight_masks[name]
    num_neurons += weight_mask_tensor.shape[0]

num_neurons

In [None]:
1 + za

In [None]:
ga = torch.ones(anp_system.layer_extra_params['fc']['delta'].shape[:1]).cuda()

In [None]:
((1 + ga) * anp_system.layer_extra_params['fc']['delta'].T).T

In [None]:
((1 + ga) * anp_system.layer_extra_params['fc']['delta'].T).T == 2 * anp_system.layer_extra_params['fc']['delta']

In [None]:
torch.all(((1 + ga) * anp_system.layer_extra_params['fc']['delta'].T).T == 2 * anp_system.layer_extra_params['fc']['delta'])

In [None]:
anp_system.layer_extra_params['layer2.1.conv2']['delta'].T.shape

In [None]:
torch.all(((1 + za) * anp_system.layer_extra_params['layer2.1.conv2']['delta'].T).T == 2 * anp_system.layer_extra_params['layer2.1.conv2']['delta'])

In [None]:
# torch.mul((1 + za), anp_system.layer_extra_params['layer2.1.conv2']['delta'], axis=0)

In [None]:
t_dim = len(anp_system.layer_extra_params['layer2.1.conv2']['delta'].shape)
n_dim = [anp_system.layer_extra_params['layer2.1.conv2']['delta'].shape[0],] + [1,] * (t_dim - 1)
zza = torch.ones(n_dim).cuda()

In [None]:
zza.shape

In [None]:
torch.all((1 + zza) * anp_system.layer_extra_params['layer2.1.conv2']['delta'] == 2 * anp_system.layer_extra_params['layer2.1.conv2']['delta'])

In [None]:
t_dim = len(anp_system.layer_extra_params['fc']['delta'].shape)
n_dim = [anp_system.layer_extra_params['fc']['delta'].shape[0],] + [1,] * (t_dim - 1)
gga = torch.ones(n_dim).cuda()

In [None]:
gga.shape

In [None]:
torch.all((1 + gga) * anp_system.layer_extra_params['fc']['delta'] == 2 * anp_system.layer_extra_params['fc']['delta'])

In [None]:
torch.__version__