<a href="https://colab.research.google.com/github/koconno8/CS436FinalProject/blob/main/FinalProject436.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
from torchvision import datasets, transforms
import numpy as np
import time
import random
import getopt
import sys
import os
import logging

In [None]:
def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out += self.shortcut(x) if self.shortcut else x
        out = F.relu(out)
        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x) if self.shortcut else x
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = conv3x3(3,64)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])

def ResNet34():
    return ResNet(BasicBlock, [3,4,6,3])

def ResNet50():
    return ResNet(Bottleneck, [3,4,6,3])

def ResNet101():
    return ResNet(Bottleneck, [3,4,23,3])

def ResNet152():
    return ResNet(Bottleneck, [3,8,36,3])

def test_resnet():
    net = ResNet50()
    y = net(Variable(torch.randn(1,3,32,32)))
    print(y.size())

In [None]:
if not os.path.isdir('./results'):
    os.mkdir('./results')
if not os.path.isdir('./log'):
    os.mkdir('./log')
if not os.path.isdir('./models'):
    os.mkdir('./models')
if not os.path.isdir('./data'):
    os.mkdir('./data')
if not os.path.isdir('./runs'):
    os.mkdir('./runs')

#READ ARGUMENTS
opts = sys.argv[1::2]
args = sys.argv[2::2]

#Defaults
l_ce = 1.0
l2_reg = 10.0
mul = 4.0
TRAIN_BATCH_SIZE = 64
Feps = 8.0
B_val = 4.0
MAX_EPOCHS = 100
lr_factor = 10.0

for i in range(len(opts)):
    opt = opts[i]
    arg = args[i]
    #Experiment name
    if opt=='-EXP_NAME':
        EXP_NAME = str(arg)
        LOG_FILE_NAME = 'log/'+str(arg)+'.txt'
        print('EXP_NAME:',EXP_NAME)
    if opt=='-MAX_EPOCHS':
        MAX_EPOCHS = int(arg)
        print('MAX_EPOCHS:',MAX_EPOCHS)
    if opt=='-l_ce':
        l_ce = float(arg)
        print('l_ce:',l_ce)
    if opt=='-B_val':
        B_val = float(arg)
        print('Initial Noise Magnitude:',B_val)
    if opt=='-l2_reg':
        l2_reg = float(arg)
        print('l2_reg:',l2_reg)
    if opt=='-b_size':
        TRAIN_BATCH_SIZE = int(arg)
        print('Training Batch Size:',TRAIN_BATCH_SIZE)
    if opt=='-Feps':
        Feps = float(arg)
        print('RFGSM Epsilon:',Feps)
    if opt=='-lr_factor':
        lr_factor = float(arg)
        print('lr_factor:',lr_factor)
    if opt=='-mul':
        mul = float(arg)
        print('Step Mult. factor:',mul)

writer = SummaryWriter(log_dir="runs/"+EXP_NAME)

###################################### Function Definitions #######################################

def Guided_Attack(model, loss, image, target, eps=8/255, bounds=[0,1], steps=1, P_out=[], l2_reg=10, alt=1):
    tar = Variable(target.cuda())
    img = image.cuda()
    eps = eps/steps
    for step in range(steps):
        img = Variable(img,requires_grad=True)
        if img.grad is not None:
            img.grad.zero_()
        out = model(img)
        R_out = nn.Softmax(dim=1)(out)
        cost = loss(out,tar) + alt*l2_reg*(((P_out - R_out)**2.0).sum(1)).mean(0)
        cost.backward()
        per = eps * torch.sign(img.grad.data)
        adv = img.data + per.cuda()
        img = torch.clamp(adv,bounds[0],bounds[1])
    return img

def execfile(filepath):
    with open(filepath, 'rb') as file:
        exec(compile(file.read(), filepath, 'exec'))
        globals().update(locals())

#######################################Cudnn##############################################
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark=True
print('Cudnn status:',torch.backends.cudnn.enabled)
#######################################Set tensor to CUDA#########################################
torch.set_default_tensor_type('torch.cuda.FloatTensor')
#######################################Parameters##################################################
TRAIN_BATCH_SIZE = TRAIN_BATCH_SIZE
VAL_BATCH_SIZE   = 128
TEST_BATCH_SIZE   = 128
BASE_LR          = 1e-1
MAX_ITER         = (MAX_EPOCHS*50000)/TRAIN_BATCH_SIZE
MODEL_PREFIX     = 'models/' + EXP_NAME + '_'

####################################### load network ################################################
execfile('ResNet.py')
model = ResNet18()
model.cuda()
model.train()

###################################### load data ####################################################
transform_train = transforms.Compose([
        transforms.RandomCrop(size=32,padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),])

transform_test = transforms.Compose([
        transforms.ToTensor(),])

train_set  = torchvision.datasets.CIFAR10(root='./data', train=True , download=True, transform=transform_train)
val_set    = torchvision.datasets.CIFAR10(root='./data', train=True , download=True, transform=transform_test)
test_set   = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Creating Train-Val split from original Training set
train_size = 49000
valid_size = 1000
test_size  = 10000

train_indices = list(range(50000))
val_indices = []
count = np.zeros(10)
for index in range(len(train_set)):
    _, target = train_set[index]
    if(np.all(count==100)):
        break
    if(count[target]<100):
        count[target] += 1
        val_indices.append(index)
        train_indices.remove(index)

print("Size of train set:",len(train_indices))
print("Size of val set:",len(val_indices))

#get data loader for train, val and test
train_loader = torch.utils.data.DataLoader(train_set,batch_size=TRAIN_BATCH_SIZE ,sampler=SubsetRandomSampler(train_indices))
val_loader   = torch.utils.data.DataLoader(val_set,sampler = SubsetRandomSampler(val_indices),batch_size=VAL_BATCH_SIZE)
test_loader   = torch.utils.data.DataLoader(test_set,batch_size=TEST_BATCH_SIZE)
print('CIFAR10 dataloader: Done')

###################################################################################################
epochs    = MAX_EPOCHS
iteration = 0
loss      = nn.CrossEntropyLoss()
LR = BASE_LR
optimizer = torch.optim.SGD(model.parameters(), lr=LR,momentum=0.9,weight_decay=5e-4)
optimizer.zero_grad()

##################################################################################################
for epoch in range(epochs):
    start = time.time()
    iter_loss =0
    counter =0
    total_ce_loss = torch.tensor([0.0])
    total_reg_loss = torch.tensor([0.0])

    for data, target in train_loader:
        data   = Variable(data).cuda()
        target = Variable(target).cuda()

        out  = model(data)
        P_out = nn.Softmax(dim=1)(out)

        adv_data = data + ((B_val/255.0)*torch.sign(torch.tensor([0.5]) - torch.rand_like(data)).cuda())
        adv_data = torch.clamp(adv_data,0.0,1.0)

        model.eval()
        adv_data = Guided_Attack(model,loss,adv_data,target,eps=Feps/255.0,steps=1,P_out=P_out,l2_reg=l2_reg,alt=(counter%2))

        delta = adv_data - data
        delta = torch.clamp(delta,-8.0/255.0,8.0/255)
        adv_data = data+delta
        adv_data = torch.clamp(adv_data,0.0,1.0)

        model.train()
        optimizer.zero_grad()
        adv_out  = model(adv_data)
        out  = model(data)

        Q_out = nn.Softmax(dim=1)(adv_out)
        P_out = nn.Softmax(dim=1)(out)

        '''LOSS COMPUTATION'''
        closs = loss(out,target)
        reg_loss = ((P_out - Q_out)**2.0).sum(1).mean(0)
        cost = l_ce*closs + l2_reg*reg_loss
        cost.backward()
        optimizer.step()

        total_ce_loss += closs.data
        total_reg_loss += l2_reg*reg_loss.data

        if iteration%100==0:
            msg = 'iter,'+str(iteration)+',clean loss,'+str(closs.data.cpu().numpy()) \
            +',reg loss,'+str(reg_loss.data.cpu().numpy()) \
            +',total loss,'+str(cost.data.cpu().numpy()) \
            +'\n'
            log_file = open(LOG_FILE_NAME,'a+')
            log_file.write(msg)
            log_file.close()
            model.train()

        iteration = iteration + 1
        counter = counter + 1
        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d] : Loss:%f \t\t'
                %(epoch, MAX_EPOCHS, counter,
                    (train_size/TRAIN_BATCH_SIZE),cost.data.cpu().numpy()))
    end = time.time()
    print('Epoch:',epoch,' Time taken:',(end-start))

    model_name = MODEL_PREFIX+str(epoch)+'.pkl'
    torch.save(model.state_dict(),model_name)

    writer.add_scalar("Loss/Train_CE_loss", total_ce_loss, epoch)
    writer.add_scalar("Loss/Train_Reg_loss", total_reg_loss, epoch)

    if epoch in [70,85]:
        LR /= lr_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = LR
    if epoch == 85:
        l2_reg = l2_reg*mul

#######################################################################################################################
model.eval()

def FGSM_Attack_step(model,loss,image,target,eps=8/255,bounds=[0,1],steps=30):
    tar = Variable(target.cuda())
    img = image.cuda()
    eps = eps/steps
    for step in range(steps):
        img = Variable(img,requires_grad=True)
        if img.grad is not None:
            img.grad.zero_()
        out  = model(img)
        cost = loss(out,tar)
        cost.backward()
        per = eps * torch.sign(img.grad.data)
        adv = img.data + per.cuda()
        img = torch.clamp(adv,bounds[0],bounds[1])
    return img

def MSPGD(model,loss,data,target,eps=8/255,eps_iter=2/255,bounds=[],steps=[7,20,50,100,500]):
    """
    model
    loss : loss used for training
    data : input to network
    target : ground truth label corresponding to data
    eps : perturbation srength added to image
    eps_iter
    """
    if model.training:
        assert 'Model is in  training mode'
    tar = Variable(target.cuda())
    data = data.cuda()
    B,C,H,W = data.size()
    noise  = torch.FloatTensor(np.random.uniform(-eps,eps,(B,C,H,W))).cuda()
    noise  = torch.clamp(noise,-eps,eps)
    img_arr = []
    for step in range(steps[-1]):
        img = data + noise
        img = Variable(img,requires_grad=True)
        if img.grad is not None:
            img.grad.zero_()
        out  = model(img)
        cost = loss(out,tar)
        cost.backward()
        per =  torch.sign(img.grad.data)
        per[:,0,:,:] = (eps_iter * (bounds[0,1] - bounds[0,0])) * per[:,0,:,:]
        if(per.size(1)>1):
            per[:,1,:,:] = (eps_iter * (bounds[1,1] - bounds[1,0])) * per[:,1,:,:]
            per[:,2,:,:] = (eps_iter * (bounds[2,1] - bounds[2,0])) * per[:,2,:,:]
        adv = img.data + per.cuda()
        img.requires_grad =False
        img[:,0,:,:] = torch.clamp(adv[:,0,:,:],bounds[0,0],bounds[0,1])
        if(per.size(1)>1):
            img[:,1,:,:] = torch.clamp(adv[:,1,:,:],bounds[1,0],bounds[1,1])
            img[:,2,:,:] = torch.clamp(adv[:,2,:,:],bounds[2,0],bounds[2,1])
        img = img.data
        noise = img - data
        noise  = torch.clamp(noise,-eps,eps)
        for j in range(len(steps)):
            if step == steps[j]-1:
                img_tmp = data + noise
                img_arr.append(img_tmp)
                break
    return img_arr

def max_margin_loss(x,y):
    B = y.size(0)
    corr = x[range(B),y]
    x_new = x - 1000*torch.eye(10)[y].cuda()
    tar = x[range(B),x_new.argmax(dim=1)]
    loss = tar - corr
    loss = torch.mean(loss)
    return loss

def GAMA_PGD(model, data, target, eps, eps_iter, bounds, steps, w_reg, lin, SCHED, drop):
    #Raise error if in training mode
    if model.training:
        assert 'Model is in  training mode'
    tar = Variable(target.cuda())
    data = data.cuda()
    B,C,H,W = data.size()
    noise  = torch.FloatTensor(np.random.uniform(-eps,eps,(B,C,H,W))).cuda()
    noise  = eps*torch.sign(noise)
    img_arr = []
    W_REG = w_reg
    orig_img = data+noise
    orig_img = Variable(orig_img,requires_grad=True)
    for step in range(steps[-1]):
        # convert data and corresponding into cuda variable
        img = data + noise
        img = Variable(img,requires_grad=True)

        if step in SCHED:
            eps_iter /= drop

        # make gradient of img to zeros using .zero_()
        if img.grad is not None:
            img.grad.zero_()

        # forward pass
        orig_out = model(orig_img)
        P_out = nn.Softmax(dim=1)(orig_out)

        out  = model(img)
        Q_out = nn.Softmax(dim=1)(out)
        #compute loss using true label
        if step <= lin:
            cost =  W_REG*((P_out - Q_out)**2.0).sum(1).mean(0) + max_margin_loss(Q_out,tar)
            W_REG -= w_reg/lin
        else:
            cost = max_margin_loss(Q_out,tar)
        #backward pass
        cost.backward()
        #get gradient of loss wrt data
        per =  torch.sign(img.grad.data)
        #convert eps 0-1 range to per channel range
        per[:,0,:,:] = (eps_iter * (bounds[0,1] - bounds[0,0])) * per[:,0,:,:]
        if(per.size(1)>1):
            per[:,1,:,:] = (eps_iter * (bounds[1,1] - bounds[1,0])) * per[:,1,:,:]
            per[:,2,:,:] = (eps_iter * (bounds[2,1] - bounds[2,0])) * per[:,2,:,:]
        #  ascent
        adv = img.data + per.cuda()
        #clip per channel data out of the range
        img.requires_grad =False
        img[:,0,:,:] = torch.clamp(adv[:,0,:,:],bounds[0,0],bounds[0,1])
        if(per.size(1)>1):
            img[:,1,:,:] = torch.clamp(adv[:,1,:,:],bounds[1,0],bounds[1,1])
            img[:,2,:,:] = torch.clamp(adv[:,2,:,:],bounds[2,0],bounds[2,1])
        img = img.data
        noise = img - data
        noise  = torch.clamp(noise,-eps,eps)

        for j in range(len(steps)):
            if step == steps[j]-1:
                img_tmp = data + noise
                img_arr.append(img_tmp)
                break
    return img_arr