In [None]:
# metrics
# [clean accuracy, robust accuracy, top-k mask overlap under pertubed]

# vit model
# deit, vit

# dataset
# cifar10, lsun_bedroom, lsun_cat, lsun_church

# threat method

In [73]:
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import requests

url = 'https://www.cs.toronto.edu/~kriz/cifar-10-sample/dog10.png'
image = Image.open(requests.get(url, stream=True).raw)
feature_extractor = ViTFeatureExtractor.from_pretrained('nateraw/vit-base-patch16-224-cifar10')
model = ViTForImageClassification.from_pretrained('nateraw/vit-base-patch16-224-cifar10')
inputs = feature_extractor(images=image, return_tensors="pt")
outputs = model(**inputs)
preds = outputs.logits.argmax(dim=1)

classes = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'
]
classes[preds[0]]

'dog'

In [98]:
model

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0): ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=768, out_

In [92]:
import torchvision
from torchvision import transforms
import torch
# model = torch.hub.load('facebookresearch/deit:main', 
# 'deit_tiny_patch16_224', pretrained=True)

# normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    # normalize,
])

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=8)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')



Files already downloaded and verified


In [93]:
device = 'cuda:2'
net = model.to(device)

In [94]:
from ut import *

In [95]:
url = 'https://www.cs.toronto.edu/~kriz/cifar-10-sample/dog10.png'
image = Image.open(requests.get(url, stream=True).raw)
import numpy as np
image = torch.tensor(np.array(image)).permute(2, 0, 1)
# image.shape
# inputs = feature_extractor(images=image, return_tensors="pt")
# inputs
# feas = feature_extractor(images=[torch.tensor(np.array(image)),torch.tensor(np.array(image))], return_tensors="pt")
# feas['pixel_values'].shape


In [96]:

# Loss is CE
from torch import nn
criterion = nn.CrossEntropyLoss()

##### Validation
def test():
    # global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            # inputs, targets = inputs.to(device), targets.to(device)
            # outputs = net(inputs)
            inputs = [inputs[i] for i in range(inputs.shape[0])]
            inputs = feature_extractor(images=inputs, return_tensors="pt")
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(**inputs)
            outputs = outputs.logits
            
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))
    
    # Save checkpoint.
    acc = 100.*correct/total
    return test_loss, acc

In [100]:
# feature_extractor

In [97]:
test()



(6.6333331894129515, 98.3)

In [None]:
# clean case

# get model
# get dataset
# loop over dataset
# get metrics: clean accuracy

In [None]:
# poisoning case without defense

# get model
# get dataset  
# loop over dataset
net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        # inputs, targets = inputs.to(device), targets.to(device)
        # outputs = net(inputs)
        inputs = [inputs[i] for i in range(inputs.shape[0])]
        inputs = feature_extractor(images=inputs, return_tensors="pt")
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(**inputs)
        outputs = outputs.logits
        
        loss = criterion(outputs, targets)

        test_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
            % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

# Save checkpoint.
acc = 100.*correct/total
# get the original attention mask
# poison the data with Adversarial Attack
# get the attention mask after poisoning
# get the metrics: robust accuracy, top-k mask overlap under pertubed


In [None]:
# poisoning case with defense

# get model
# get dataset  
# loop over dataset
# for data inside the poison set
# get the original attention mask
# poison the data with Adversarial Attack with epsilon
# 1. denoise poisoned image with pretrained diffusion model with epsilon
# 2. conduct random smoothing on the denoised image with delta of R computed using (2*delta ?)
# get the attention mask after poisoning
# get the metrics: robust accuracy, top-k mask overlap under pertubed
