In [1]:
from toolkit.commons import *
import torch
from torchattacks import CW
from torch import optim
import math
import csv
# torch.cuda.set_device(3)
from torchvision.models import EfficientNet_B0_Weights, efficientnet_b0
from torchvision.models import densenet121, DenseNet121_Weights
from torchvision.models import resnet50, ResNet50_Weights

## UCA

In [2]:
class CE(CW):
    
    def __init__(self, model, c=1, kappa=0, steps=50, lr=0.01, lamb=1):
        super(CE, self).__init__(model, c, kappa, steps, lr)
        self.c = c
        self.kappa = kappa
        self.steps = steps
        self.lr = lr
        self.lamb = lamb
        self.supported_mode = ['default', 'targeted']

    def attack_uncertainty(self, loader, return_type="loader"):
        ces = []
        ys = []
        for i ,(X, y) in enumerate(loader):
            if (i+1) % 100 == 0:
                print(f"Processing {i+1}/{len(loader)}")
            ce_X = self.__call__(X, y).clone().detach()
            ces.append(ce_X)
            ys.append(y)
        if return_type == "loader":
            return wrapper.to_loader((torch.cat(ces), torch.cat(ys)), batch_size = 1)
        elif return_type == "tensor":
            return torch.cat(ces)
        else:
            return None
    
    def h_loss(self, images, labels):
        logits = self.get_logits(images)
        probs = torch.softmax(logits, dim=-1)
        
        conf, pred_labels = torch.max(probs, dim=1)
        
        indices = labels.view(-1, 1).to(torch.int64)
        fx_y = torch.gather(probs, 1, indices).squeeze()

        return fx_y

    
    def h_loss_2(self, images, init_labels, sign):
        logits = self.get_logits(images)
        probs = torch.softmax(logits, dim=-1)
        indices = init_labels.view(-1, 1).to(torch.int64)
        fx_y = torch.gather(probs, 1, indices).squeeze()
        return sign * fx_y

    def forward(self, images, labels):
                
        images = images.clone().detach().to(self.device)
        labels = labels.clone().detach().to(self.device)

        init_logits = self.get_logits(images)
        init_probs = torch.softmax(init_logits, dim=-1)
        init_confs, init_pred_labels = torch.max(init_probs, dim=-1)
        
        sign = 2 * (init_pred_labels == labels) - 1

        w = self.inverse_tanh_space(images).detach()
        w.requires_grad = True

        best_adv_images = images.clone().detach()
        best_L2 = 1e10*torch.ones((len(images))).to(self.device)
        prev_cost = 1e10
        dim = len(images.shape)

        MSELoss = nn.MSELoss(reduction='none')
        Flatten = nn.Flatten()

        optimizer = optim.Adam([w], lr=self.lr)

        for step in range(self.steps):
            # Get adversarial images
            adv_images = self.tanh_space(w)

            # Calculate loss
            current_L2 = MSELoss(Flatten(adv_images),
                                 Flatten(images)).sum(dim=1)
            L2_loss = current_L2.sum()

            outputs = self.get_logits(adv_images)
            
            # h_loss = self.h_loss(adv_images, labels)
            
            h_loss = self.h_loss_2(adv_images, init_pred_labels, sign)
            # print(L2_loss,"           ", h_loss)
            cost = L2_loss + self.c * h_loss.sum()

            optimizer.zero_grad()
            cost.backward()
            optimizer.step()

            # Update adversarial images
            _, pre = torch.max(outputs.detach(), 1)
            correct = (pre == labels).float()

            # mask = torch.zeros_like(correct)
            logits = self.get_logits(adv_images)
            probs = torch.softmax(logits, dim=-1)
            pred_labels = torch.argmax(probs, dim=-1)
            confs, _ = torch.max(probs, dim=1)
            
            mask = 1 * (
                (~ torch.logical_xor(init_pred_labels == labels, (init_confs - confs) > 0))
                & (pred_labels == init_pred_labels)
            )
            
           
            mask = mask.view([-1]+[1]*(dim-1))
            best_adv_images = mask*adv_images.detach() + (1-mask)*best_adv_images 

        return best_adv_images

## UCA_black

In [3]:
class CE_Black(CW):
    
    def __init__(self, model, model2, c=1, kappa=0, steps=50, lr=0.01, lamb=1):
        super(CE_Black, self).__init__(model, c, kappa, steps, lr)
        self.c = c
        self.kappa = kappa
        self.steps = steps
        self.lr = lr
        self.lamb = lamb
        self.supported_mode = ['default', 'targeted']
        self.model2 = model2

    def attack_uncertainty(self, loader, return_type="loader"):
        ces = []
        ys = []
        for i ,(X, y) in enumerate(loader):
            # print(i)
            ce_X = self.__call__(X, y, self.model2).clone().detach()
            # print(ce_X.shape, y.shape)
            ces.append(ce_X)
            ys.append(y)
        if return_type == "loader":
            return wrapper.to_loader((torch.cat(ces), torch.cat(ys)), batch_size=8)
            # return ces, ys
        elif return_type == "tensor":
            return torch.cat(ces)
        else:
            return None
    
    def h_loss(self, images, labels):
        logits = self.get_logits(images)
        probs = torch.softmax(logits, dim=-1)
        
        conf, pred_labels = torch.max(probs, dim=1)
        
        indices = labels.view(-1, 1).to(torch.int64)
        fx_y = torch.gather(probs, 1, indices).squeeze()

        return fx_y
        # return 1 / fx_y * torch.log10(conf - fx_y + 1 + 1e-6) + self.lamb * fx_y
    
    def h_loss_2(self, images, init_labels, sign):
        logits = self.get_logits(images)
        probs = torch.softmax(logits, dim=-1)
        indices = init_labels.view(-1, 1).to(torch.int64)
        fx_y = torch.gather(probs, 1, indices).squeeze()
        return sign * fx_y

    def forward(self, images, labels, model2):
                
        images = images.clone().detach().to(self.device)
        labels = labels.clone().detach().to(self.device)

        # init_logits = self.get_logits(images)
        inputs = images
        init_logits = model2(inputs)
        init_probs = torch.softmax(init_logits, dim=-1)
        init_confs, init_pred_labels = torch.max(init_probs, dim=-1)
        
        sign = 2 * (init_pred_labels == labels) - 1

        # w = torch.zeros_like(images).detach() # Requires 2x times
        w = self.inverse_tanh_space(images).detach()
        w.requires_grad = True

        best_adv_images = images.clone().detach()
        best_L2 = 1e10*torch.ones((len(images))).to(self.device)
        prev_cost = 1e10
        dim = len(images.shape)

        MSELoss = nn.MSELoss(reduction='none')
        Flatten = nn.Flatten()

        optimizer = optim.Adam([w], lr=self.lr)

        for step in range(self.steps):
            # Get adversarial images
            adv_images = self.tanh_space(w)

            # Calculate loss
            current_L2 = MSELoss(Flatten(adv_images),
                                 Flatten(images)).sum(dim=1)
            L2_loss = current_L2.sum()

            outputs = self.get_logits(adv_images)
            
            # h_loss = self.h_loss(adv_images, labels)
            
            h_loss = self.h_loss_2(adv_images, init_pred_labels, sign)
            # print(L2_loss,"           ", h_loss)
            cost = L2_loss + self.c * h_loss.sum()

            optimizer.zero_grad()
            cost.backward()
            optimizer.step()

            # Update adversarial images
            _, pre = torch.max(outputs.detach(), 1)
            correct = (pre == labels).float()

            # mask = torch.zeros_like(correct)
            logits = self.get_logits(adv_images)
            probs = torch.softmax(logits, dim=-1)
            pred_labels = torch.argmax(probs, dim=-1)
            confs, _ = torch.max(probs, dim=1)
            
            
            logits_2 = model2(adv_images)
            probs_2 = torch.softmax(logits_2, dim=-1)
            pred_labels_2 = torch.argmax(probs_2, dim=-1)
            confs_2, _ = torch.max(probs_2, dim=1)
            
            # mask = 1 * ((init_confs - confs > 0) & (pred_labels == init_pred_labels))
            
            mask = 1 * (
                (~ torch.logical_xor(init_pred_labels == labels, (init_confs - confs_2) > 0))
                & (pred_labels_2 == init_pred_labels)
            )
            
            # print(init_confs)
            # print(confs)
            # print("comp: ", (init_confs - confs > 0))
                  
            # print(pred_labels)
            # print(labels)
            # print("comp: ", pred_labels == labels)
            # print((init_confs - confs > 0) & (pred_labels == labels))
            
            # print(mask)
            mask = mask.view([-1]+[1]*(dim-1))
            best_adv_images = mask*adv_images.detach() + (1-mask)*best_adv_images
            
            # with torch.no_grad():
            #     best_logits = self.get_logits(best_adv_images)
            #     best_probs = torch.softmax(best_logits, dim=-1)
            #     best_confs, best_labels = torch.max(best_probs, dim=-1)
                
            # satisfied_index = ~ torch.logical_xor(init_pred_labels == best_labels, (init_confs - best_confs) > 0)
            # satisfied_count = torch.count_nonzero(satisfied_index)
            # if satisfied_count / best_labels.numel():
            #     return best_adv_images
            

        return best_adv_images

## ACE

In [4]:
def softmax_response(logits):
    return torch.nn.functional.softmax(logits, dim=1)

def attack_confidence_estimation(model, input, label, normalization, proxy=None, epsilon=0.05, epsilon_decay=0.5, max_iterations=15, confidence_score_function=softmax_response, device='cuda'):
    input = input.to(device)
    label = label.to(device)
    model = model.to(device)
    data = normalization(input)
    data.requires_grad = True
    if proxy:
        # Black-box setting, use proxy to calculate the gradients
        proxy = proxy.to(device)
        output = proxy(data)
        proxy.zero_grad()
        with torch.no_grad():
            model_output = model(normalization(input))
    else:
        # White-box setting, use model itself to calculate the gradients
        output = model(data)
        model.zero_grad()
        model_output = output
    init_prediction = model_output.argmax()
    output = confidence_score_function(output)
    # Calculate gradients of model in backward pass
    output[0][init_prediction.item()].backward(retain_graph=True)
    # Collect gradients
    jacobian = data.grad.data
    if init_prediction == label:
        # If the model is correct, we wish to make it less confident of its prediction
        attack_direction = -1
    else:
        # Otherwise, we wish to make it more confident of its misprediction
        attack_direction = 1
    with torch.no_grad():
        for i in range(max_iterations):
            jacobian_sign = jacobian.sign()
            perturbed_image = input + epsilon * jacobian_sign * attack_direction
            perturbed_image = torch.clamp(perturbed_image, 0, 1)
            new_output = model(normalization(perturbed_image))
            if new_output.argmax() == init_prediction:
                # This adversarial example does not change the prediction as required, return it
                return perturbed_image
            else:
                epsilon = epsilon * epsilon_decay
        # The attack has failed; either the epsilon was too large, epsilon_decay too small,
        # or max_iterations was insufficient. Return original input.
        return input

def identity_transform(x):
    return x

def ACE(model, loader, device, epsilon=0.05, epsilon_decay=0.5, max_iterations=15, confidence_score_function=softmax_response, normalization=identity_transform):
    adversarial_samples = []
    labely = []
    for images, labels in loader:
        images, labels = images.cuda(), labels.cuda()
        adversarial_sample = []
        for i in range(images.size(0)):
            input = images[i].unsqueeze(0)
            true_label = labels[i]
            model.eval()
            with torch.no_grad():
                original_output = model((input))
            orig_prediction = torch.nn.functional.softmax(original_output, dim=1).max(1)

            adversarial_example = attack_confidence_estimation(model=model, input=input, label=true_label, normalization=normalization)
            with torch.no_grad():
                attacked_output = model((adversarial_example))
            attacked_prediction = torch.nn.functional.softmax(attacked_output, dim=1).max(1)
            adversarial_example_np = adversarial_example.squeeze().detach().cpu().numpy()
            adversarial_sample.append(adversarial_example.detach())
        adversarial_sample = torch.cat(adversarial_sample, dim=0)
        adversarial_samples.append(adversarial_sample)
        labely.append(labels)
    ACE_loader = wrapper.to_loader((torch.cat(adversarial_samples), torch.cat(labely)), batch_size=1)
    return ACE_loader

In [2]:
from method import *

In [4]:
from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights
from torchvision.models import efficientnet_v2_l, EfficientNet_V2_L_Weights

In [5]:

if (setting_name == "ImageNet"):
    efficient = efficientnet_b0(EfficientNet_B0_Weights.IMAGENET1K_V1)
    densenet = densenet121(DenseNet121_Weights.IMAGENET1K_V1)    
    weights = ResNet50_Weights.DEFAULT
    resnet = resnet50(weights=weights)
    dataset = get_dataset()
else:
    dataset = get_dataset()
    model = get_network()
    black_model = get_network()

Files already downloaded and verified
Files already downloaded and verified


In [18]:
ce = CE(model, lamb=10, steps=50, lr=0.1, c=1)
if (setting_name == "ImageNet"):
    ce.set_normalization_used(dataset.normalize.mean, dataset.normalize.std)
else:
    ce.set_normalization_used(* MEAN_STDs[setting_name])
ce.set_device("cuda:3")
CE_loader = ce.attack_uncertainty(dataset.test_loader)

In [None]:
ACE_loader = ACE(model, dataset.test_loader, "cuda")

In [None]:
ce_black = CE_Black(black_model, model, lamb=10, steps=50, lr=0.1, c=0.4)
if (setting_name == "ImageNet"):
    ce_black.set_normalization_used(dataset.normalize.mean, dataset.normalize.std)
else:
    ce_black.set_normalization_used(* MEAN_STDs[setting_name])
ce_black.set_device("cuda")
CE_black_loader_kd1 = ce_black.attack_uncertainty(dataset.test_loader)