In [1]:
MODELNAME='cifar10_backdoored_1' 
MODELCLASS='CIFAR10' #'MNIST'

In [2]:
import torch
from torch import nn
from torch import linalg as LA
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import random
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from torch import Tensor
#from typing import Any
%matplotlib inline
import sys
sys.path.append('../')
import models.definitions.CIFAR100Net as CIFAR100
import models.definitions.CIFAR10Net as CIFAR10
import models.definitions.MNISTNet as MNIST

# Class names for CIFAR10
class_names_MNIST=['0','1','2','3','4','5','6','7','8','9']
class_names_CIFAR10 = ['airplane', 'automobile', 'bird', 'cat', 'deer','dog', 'frog', 'horse', 'ship', 'truck']
class_names_CIFAR100 =['beaver',	'dolphin',	'otter',	'seal',	'whale',
'aquarium fish',	'flatfish',	'ray',	'shark',	'trout',
'orchids',	'poppies',	'roses',	'sunflowers',	'tulips',
'bottles',	'bowls',	'cans',	'cups',	'plates',
'apples',	'mushrooms',	'oranges',	'pears',	'sweet peppers',
'clock',	'computer keyboard',	'lamp',	'telephone',	'television',
'bed',	'chair',	'couch',	'table',	'wardrobe',
'bee',	'beetle',	'butterfly',	'caterpillar',	'cockroach',
'bear',	'leopard',	'lion',	'tiger',	'wolf',
'bridge',	'castle',	'house',	'road',	'skyscraper',
'cloud',	'forest',	'mountain',	'plain',	'sea',
'camel',	'cattle',	'chimpanzee',	'elephant',	'kangaroo',
'fox',	'porcupine',	'possum',	'raccoon',	'skunk',
'crab',	'lobster',	'snail',	'spider',	'worm',
'baby',	'boy',	'girl',	'man',	'woman',
'crocodile',	'dinosaur',	'lizard',	'snake',	'turtle',
'hamster',	'mouse',	'rabbit',	'shrew',	'squirrel',
'maple',	'oak',	'palm',	'pine',	'train',
'bicycle',	'bus',	'motorcycle',	'pickup truck',	'truck',
'lawn-mower',	'rocket',	'streetcar',	'tank',	'tractor']


model_map={'CIFAR10':CIFAR10, 'CIFAR100':CIFAR100, 'MNIST':MNIST}
triggersize_map={'CIFAR10':32, 'CIFAR100':32, 'MNIST':28}
dim_map={'CIFAR10':3, 'CIFAR100':3, 'MNIST':1}
trigger_type_map={'CIFAR10':[1,2], 'CIFAR100':[1,2], 'MNIST':[2]}
class_names_map={'CIFAR10':class_names_CIFAR10, 'CIFAR100':class_names_CIFAR100, 'MNIST':class_names_MNIST}
epochs_map={'CIFAR10':4 ,'CIFAR100':3, 'MNIST':2}
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# Defining L norms

def l1_norm(x: Tensor, y: Tensor=0) -> Tensor:
    """ Compute the L1 norm between two tensors """
    res = torch.abs(x - y)
    return torch.sum(res)

def l2_norm(x, y=0):
    """ Compute the L2 norm between two tensors """
    res = torch.sum((x - y) ** 2)
    return torch.sqrt(res)

def linf_norm(x, y=0):
    """ Compute the L-inf norm between two tensors """
    res = torch.max(torch.abs(x - y))
    return res

In [4]:
def save_model(model, name):
    torch.save(model.state_dict(), name)
def load_model(model_class, name):
    model = model_class()
    model.load_state_dict(torch.load(name))

    return model
def generate_trigger(model, dataloader, delta_0,loss_fn, optimizer, device, bdtype):
    #returns the trigger after this iteration
    #delta_0 is the input trigger after last iteration
    size = len(dataloader.dataset)
    model.train()
    delta=delta_0.detach().clone().requires_grad_() #detach may not be needed
    delta.retain_grad() #may not needed
    #print(delta.is_leaf)
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        assert delta.requires_grad, "Error: requires_grad is false"
        x_stamped=torch.add(x,delta) #from here delta is part of the graph
        pred = model(x_stamped)
        if bdtype=='MNIST':
            loss = loss_fn(pred, y) + l1_norm(delta)+(delta<0).type(torch.float32).sum()
        else:    
            loss = loss_fn(pred, y) +l1_norm(delta[0,:,:])+l1_norm(delta[0,:,:]-delta[1,:,:])+l1_norm(delta[0,:,:]-delta[2,:,:])+l1_norm(delta[1,:,:]-delta[2,:,:])
            #loss = loss_fn(pred, y) +LA.norm(LA.norm((torch.abs(delta)>0.01).type(torch.float32) ,2, dim=2),2)#+LA.norm(LA.norm((delta-0.5),1, dim=2),1)
        
        optimizer.zero_grad()         
        loss.backward(inputs=delta)#(retain_graph=True)
        #print(delta.grad.data.sum())
        #optimizer.step()
        temp = delta.detach().clone()
        delta=(temp-(delta.grad*lr)).requires_grad_()
        #delta.grad.data.zero_()
        if batch % 100 == 0:
            #print(w_Trigger.is_leaf,w_Trigger.grad.data.sum())
            loss, current = loss.item(), batch * len(x)
            print('loss: {:.4f} [{}/{}]'.format(loss, current, size))
    return delta
def test_trigger(model, dataloader,delta, loss_fn, device):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.to(device)
    model.eval()
    loss, correct = 0.0, 0    
    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            x_stamped=torch.add(x,delta)
            pred = model(x_stamped)
            loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.int).sum().item()
    
    loss /= num_batches
    correct /= size
    print('Test Result: Accuracy @ {:.2f}%, Avg loss @ {:.4f}\n'.format(100 * correct, loss))
    return correct

## number of epochs have been fine tuned for CIFAR 10 and 100

In [5]:

TriggerSize=triggersize_map[MODELCLASS]
testmodel=load_model(model_map[MODELCLASS],  f'../model/{MODELNAME}.pt')
testmodel=testmodel.to(device)
transform = transforms.ToTensor()
train_kwargs = {'batch_size': 100, 'shuffle':True}
test_kwargs = {'batch_size': 1000}
lr=0.01
optimizer = optim.Adam(testmodel.parameters(), lr=0.1) # not using optimizer here
num_of_epochs = epochs_map[MODELCLASS]



running trigger generation for the first 10 classes only

In [6]:
# to map dataset, add this block if need to re-download a fresh dataset
trainset_map={'CIFAR10':torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transform),
              'CIFAR100':torchvision.datasets.CIFAR100(root='./data', train=True,download=True, transform=transform),
              'MNIST':torchvision.datasets.MNIST(root='./data', train=True,download=True, transform=transform)}
testset_map={'CIFAR10':torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=transform),
              'CIFAR100':torchvision.datasets.CIFAR100(root='./data', train=False,download=True, transform=transform),
              'MNIST':torchvision.datasets.MNIST(root='./data', train=False,download=True, transform=transform)}

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


#### Set to run for the first 10 classes only

In [7]:

CLASSES=[i for i in range(10)]  # change to selected classes for CIFAR100 !!!

triggers1={}
acc1={}
for TARGET in CLASSES:
    delta =torch.zeros([dim_map[MODELCLASS],TriggerSize,TriggerSize], requires_grad=True, device=device)+0.5
    trainset = trainset_map[MODELCLASS]
    testset = testset_map[MODELCLASS]

    for i in range(len(trainset)):
        trainset.targets[i]=TARGET  
    for i in range(len(testset)):
        testset.targets[i]=TARGET  
    
    trigger_gen_loader = DataLoader(trainset, **train_kwargs)
    trigger_test_loader = DataLoader(testset, **test_kwargs)
    
    for epoch in range(num_of_epochs):
        print(f'With target number {TARGET}:' )
        delta=generate_trigger(testmodel, trigger_gen_loader, delta , nn.CrossEntropyLoss(), optimizer, device, bdtype=MODELCLASS)
        test_acc=test_trigger(testmodel, trigger_test_loader,delta, nn.CrossEntropyLoss(), device)
    triggers1[TARGET]=delta
    acc1[TARGET]=test_acc

With target number 0:
loss: 527.4644 [0/50000]
loss: 219.1471 [10000/50000]
loss: 84.9402 [20000/50000]
loss: 80.3732 [30000/50000]
loss: 76.4586 [40000/50000]
Test Result: Accuracy @ 73.99%, Avg loss @ 3.0941

With target number 0:
loss: 76.5868 [0/50000]
loss: 75.8525 [10000/50000]
loss: 73.5857 [20000/50000]
loss: 74.3367 [30000/50000]
loss: 74.4024 [40000/50000]
Test Result: Accuracy @ 79.90%, Avg loss @ 2.1954

With target number 0:
loss: 74.1564 [0/50000]
loss: 74.8852 [10000/50000]
loss: 74.5680 [20000/50000]
loss: 72.9805 [30000/50000]
loss: 73.5495 [40000/50000]
Test Result: Accuracy @ 78.89%, Avg loss @ 2.2070

With target number 0:
loss: 72.1819 [0/50000]
loss: 75.5028 [10000/50000]
loss: 75.3051 [20000/50000]
loss: 74.9023 [30000/50000]
loss: 73.2685 [40000/50000]
Test Result: Accuracy @ 79.43%, Avg loss @ 2.1435

With target number 1:
loss: 535.2415 [0/50000]
loss: 216.8242 [10000/50000]
loss: 80.6834 [20000/50000]
loss: 79.0086 [30000/50000]
loss: 75.9909 [40000/50000]
Te

loss: 74.5207 [10000/50000]
loss: 76.8596 [20000/50000]
loss: 75.3383 [30000/50000]
loss: 74.7032 [40000/50000]
Test Result: Accuracy @ 80.57%, Avg loss @ 2.2189



In [8]:
print(  "".join("{:10.2f}".format(l1_norm(triggers1[i]).item()) for i in range(10))  )
print(  "".join("{:10.2f}".format(l2_norm(triggers1[i]).item()) for i in range(10))  )
print(  "".join("{:10.2f}".format(linf_norm(triggers1[i]).item()) for i in range(10))  )
print("      "+"      ".join([str((abs(triggers1[i])>0.05).sum().item()) for i in range(10)]))
print("".join(["{:10.4f}".format(value) for key,value in acc1.items()]))

def MAD_anomaly_index(X): #X is a list of numbers (L1, L2 etc)
    Xm = np.median(X)
    devs = X-Xm
    abs_devs=abs(devs)
    MAD = np.median(abs_devs)
    degree_of_anomaly = devs/MAD #<-2
    return degree_of_anomaly,(degree_of_anomaly<-2).sum()

L=[l1_norm(triggers1[i]).item() for i in range(10)]
L1=MAD_anomaly_index([l1_norm(triggers1[i]).item() for i in range(10)])[0]<-2
Linf=MAD_anomaly_index([linf_norm(triggers1[i]).item() for i in range(10)])[0]<-2
L_acc=MAD_anomaly_index([value for key,value in acc1.items()])[0]>2
print("Infected Classes:")
print([i for i in range(10) if L1[i]])
print([i for i in range(10) if (L_acc[i] and acc1[i]>0.5)])

     59.65     61.74     61.43     59.46     64.77     54.51     66.10     67.22     74.32     66.43
      6.22      5.39      5.60      6.04      4.18      5.61      4.94      3.45      5.31      4.18
      2.57      2.11      2.14      2.87      1.32      3.15      1.65      0.68      1.54      1.30
      15      33      30      26      62      15      43      84      67      59
    0.7943    0.8564    0.8479    0.8431    0.8090    0.8397    0.8197    0.7806    0.7712    0.8057
Infected Classes:
[5]
[]


In [None]:
# to return and display trigger

In [10]:
#folder_clean="../trigger_clean"
# folder_bd="../trigger_bd"

# for i in range(10):
#     folder=  folder_bd
#     strnow=datetime.now().strftime("%X%f").replace(":","-")
#     #torch.save(triggers1[i],f"{folder}/{MODELCLASS}_type1_class_{i}_{strnow}.pt")
#     torch.save(triggers1[i],f"{folder}/{MODELCLASS}_class_{i}.pt")

   