<a href="https://colab.research.google.com/github/koconno8/CS436FinalProject/blob/main/mnistUpdate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#minstNetwork.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
from torchvision import datasets, transforms
import numpy as np
import time
import random
# conv1-conv11-pool-conv2-conv21

class MNIST_Network(nn.Module):
    def __init__(self):
        super(MNIST_Network, self).__init__()
        self.conv1    = nn.Conv2d(1,32,kernel_size=5,dilation=1, stride=1, padding=2,bias=True)
        self.conv11   = nn.Conv2d(32,32,kernel_size=5,dilation=1, stride=1, padding=2,bias=True)
        self.pool1    = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32,64,kernel_size=5,dilation=1, stride=1, padding=2,bias=True)
        self.conv21 = nn.Conv2d(64,64,kernel_size=5,dilation=1, stride=1, padding=2,bias=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1   = nn.Linear(64*7*7,512,bias=True )
        self.fc2   = nn.Linear(512, 10)
    def forward(self, input):
        out = F.relu((self.conv1(input)))
        out = F.relu((self.conv11(out)))
        out = self.pool1(out)

        out = F.relu((self.conv2(out)))
        out = F.relu((self.conv21(out)))
        out = self.pool2(out)

        # fc-1
        B,C,H,W = out.size()
        out = out.view(B,-1)
        out =(F.relu((self.fc1(out))))
        # Logits
        out = self.fc2(out)
        return out


In [None]:
#gatminst.py
#torch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
from torchvision import datasets, transforms
import numpy as np
import time
import random

##############################parse inputs###################
import getopt
import sys

import os
if not os.path.isdir('./results'):
    os.mkdir('./results')
if not os.path.isdir('./log'):
    os.mkdir('./log')
if not os.path.isdir('./models'):
    os.mkdir('./models')
if not os.path.isdir('./data'):
    os.mkdir('./data')


EXP_NAME = 'MNIST_MLeNet_GAT'
MAX_EPOCHS = 50
l_ce = 1.0
l2_reg = 15.0
mul = 3.0
TRAIN_BATCH_SIZE = 128 #CHANGED
Feps = 0.3
B_val = 0.3
lr_factor = 5.0

LOG_FILE_NAME = 'log/' + EXP_NAME + '_training_log.txt'


#Defaults
l_ce = 1.0
l2_reg = 15.0
mul = 3.0
TRAIN_BATCH_SIZE = 128 #CHANGED
Feps = 0.3
B_val = 0.3
MAX_EPOCHS = 50
lr_factor = 5.0

###################################### Function Definitions #######################################

def Guided_Attack(model,loss,image,target,eps=0.3,bounds=[0,1],steps=1,P_out=[],l2_reg=15):
    tar = Variable(target.cuda())
    img = image.cuda()
    eps = eps/steps
    for step in range(steps):
        img = Variable(img,requires_grad=True)
        img.grad = None
        out  = model(img)
        R_out = nn.Softmax(dim=1)(out)
        cost = loss(out,tar) + l2_reg*(((P_out - R_out)**2.0).sum(1)).mean(0)
        cost.backward()
        per = eps * torch.sign(img.grad.data)
        adv = img.data + per.cuda()
        img = torch.clamp(adv,bounds[0],bounds[1])
    return img

def execfile(filepath):
    with open(filepath, 'rb') as file:
        exec(compile(file.read(), filepath, 'exec'))
        globals().update(locals())



#######################################Cudnn##############################################
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark=True
print('Cudnn status:',torch.backends.cudnn.enabled)
#######################################Set tensor to CUDA#########################################
torch.set_default_tensor_type('torch.cuda.FloatTensor')
#######################################Parameters##################################################
TRAIN_BATCH_SIZE = TRAIN_BATCH_SIZE

VAL_BATCH_SIZE   = 1024
TEST_BATCH_SIZE  = 1024
BASE_LR          = 5e-3 #CHANGED FROM 1e-2
MAX_ITER         = (MAX_EPOCHS*50000)/TRAIN_BATCH_SIZE
MODEL_PREFIX     = 'models/mnist_'+EXP_NAME+'_epoch_'
####################################### load network ################################################
#execfile('MNIST_Network.py')
model = MNIST_Network()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Automatically detect GPU or fallback to CPU
model.to(device)

model.train()
###################################### load data ###################################################
transform = transforms.Compose([
        transforms.ToTensor(),])

train_set  = torchvision.datasets.MNIST(root='./data', train=True , download=True, transform=transform)
val_set    = torchvision.datasets.MNIST(root='./data', train=True , download=True, transform=transform)
test_set   = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Split training into train and validation
train_size = 50000
valid_size = 10000
test_size  = 10000

#get indices seed
np.random.seed(0)
indices    = np.arange(train_size+valid_size)
np.random.shuffle(indices)
train_indices = indices[0:train_size]
val_indices   = indices[train_size:]

#get data loader ofr train val and test
train_loader = torch.utils.data.DataLoader(train_set,batch_size=TRAIN_BATCH_SIZE ,sampler=SubsetRandomSampler(train_indices))
val_loader   = torch.utils.data.DataLoader(val_set,sampler = SubsetRandomSampler(val_indices),batch_size=VAL_BATCH_SIZE)
test_loader  = torch.utils.data.DataLoader(test_set,batch_size=TEST_BATCH_SIZE)
print('MNIST dataloader: Done')
###################################################################################################
epochs    = MAX_EPOCHS
iteration = 0
loss      = nn.CrossEntropyLoss()
loss_no_reduce = nn.CrossEntropyLoss(reduce=False)
LR   = BASE_LR
optimizer = torch.optim.SGD(model.parameters(), lr=LR,momentum=0.9,weight_decay=5e-4)
optimizer.zero_grad()
##################################################################################################
for epoch in range(epochs):
    start = time.time()
    iter_loss =0
    counter =0

    for data, target in train_loader:
        data   = Variable(data).cuda()
        target = Variable(target).cuda()

        out  = model(data)
        P_out = nn.Softmax(dim=1)(out)

        adv_data = data + ((B_val)*torch.sign(torch.tensor([0.5]) - torch.rand_like(data)).cuda())
        adv_data = torch.clamp(adv_data,0.0,1.0)

        model.eval()

        adv_data = Guided_Attack(model,loss,adv_data,target,eps=Feps,steps=1,P_out=P_out,l2_reg=l2_reg)

        delta = adv_data - data
        delta = torch.clamp(delta,-0.3,0.3)
        adv_data = data+delta
        adv_data = torch.clamp(adv_data,0.0,1.0)

        model.train()
        optimizer.zero_grad()
        adv_out  = model(adv_data)
        out  = model(data)

        Q_out = nn.Softmax(dim=1)(adv_out)
        P_out = nn.Softmax(dim=1)(out)

        '''LOSS COMPUTATION'''

        closs = loss(out,target)

        reg_loss =  ((P_out - Q_out)**2.0).sum(1).mean(0)

        cost = l_ce*closs + l2_reg*reg_loss
        cost.backward()
        optimizer.step()

        if iteration%100==0:
            msg = 'iter,'+str(iteration)+',clean loss,'+str(closs.data.cpu().numpy()) \
            +',reg loss,'+str(reg_loss.data.cpu().numpy()) \
            +',total loss,'+str(cost.data.cpu().numpy()) \
                                        +'\n'
            log_file = open(LOG_FILE_NAME,'a+')
            log_file.write(msg)
            log_file.close()
            model.train()
            #print msg
        iteration = iteration + 1
        ##console log
        counter = counter + 1
        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d] : Loss:%f \t\t'
                %(epoch, MAX_EPOCHS, counter,
                    (train_size/TRAIN_BATCH_SIZE),cost.data.cpu().numpy()))
    end = time.time()
    print('Epoch:',epoch,' Time taken:',(end-start))

    model_name = MODEL_PREFIX+str(epoch)+'.pkl'
    torch.save(model.state_dict(),model_name)

    if epoch in [epochs/4, 2*epochs/4, 3*epochs/4]:
        LR /= lr_factor
        for param_group in optimizer.param_groups:
            param_group['lr'] = LR
        l2_reg = l2_reg*mul

#######################################################################################################################
model.eval()


def FGSM_Attack_step(model,loss,image,target,eps=0.1,bounds=[0,1],GPU=0,steps=30):
    tar = Variable(target.cuda())
    img = image.cuda()
    eps = eps/steps
    for step in range(steps):
        img = Variable(img,requires_grad=True)
        img.grad = None
        out  = model(img)
        cost = loss(out,tar)
        cost.backward()
        per = eps * torch.sign(img.grad.data)
        adv = img.data + per.cuda()
        img = torch.clamp(adv,bounds[0],bounds[1])
    return img


##########################FIND BEST MODEL###############################################
EVAL_LOG_NAME = 'results/'+EXP_NAME+'.txt'
ACC_EPOCH_LOG_NAME = 'results/acc_'+EXP_NAME+'_epoch.txt'
ACC_IFGSM_EPOCH_LOG_NAME = 'results/ifgsm_acc_'+EXP_NAME+'_epoch.txt'
log_file = open(EVAL_LOG_NAME,'a')
msg = '##################### iter.FGSM: steps=40,eps=0.3 ####################\n'
log_file.write(msg)
log_file.close()
accuracy_log = np.zeros(MAX_EPOCHS)
for epoch in range(MAX_EPOCHS):
    model_name = MODEL_PREFIX+str(epoch)+'.pkl'
    model.load_state_dict(torch.load(model_name))
    eps=0.3
    accuracy = 0
    accuracy_ifgsm = 0
    i = 0
    for data, target in val_loader:
        data   = Variable(data).cuda()
        target = Variable(target).cuda()
        out = model(data)
        prediction = out.data.max(1)[1]
        accuracy = accuracy + prediction.eq(target.data).sum()
        i = i + 1
    for data, target in val_loader:
        data = FGSM_Attack_step(model,loss,data,target,eps=eps,steps=40)
        data   = Variable(data).cuda()
        target = Variable(target).cuda()
        out = model(data)
        prediction = out.data.max(1)[1]
        accuracy_ifgsm = accuracy_ifgsm + prediction.eq(target.data).sum()
    acc = (accuracy.item()*1.0) / (i*VAL_BATCH_SIZE) * 100
    acc_ifgsm = (accuracy_ifgsm.item()*1.0) / (i*VAL_BATCH_SIZE) * 100
    #log accuracy to file
    msg= str(epoch)+','+str(acc)+'\n'
    log_file = open(ACC_EPOCH_LOG_NAME,'a')
    log_file.write(msg)
    log_file.close()

    msg1= str(epoch)+','+str(acc_ifgsm)+'\n'
    log_file = open(ACC_IFGSM_EPOCH_LOG_NAME,'a')
    log_file.write(msg1)
    log_file.close()

    accuracy_log[epoch] = acc_ifgsm
    sys.stdout.write('\r')
    sys.stdout.write('| Epoch [%3d/%3d] : Acc:%f \t\t'
            %(epoch, MAX_EPOCHS,acc))
    sys.stdout.flush()

log_file = open(EVAL_LOG_NAME,'a')
msg = 'Epoch,'+str(accuracy_log.argmax())+',Acc,'+str(accuracy_log.max())+'\n'
log_file.write(msg)
log_file.close()

model_name = MODEL_PREFIX+str(accuracy_log.argmax())+'.pkl'
model.load_state_dict(torch.load(model_name))
model.eval()
model.cuda()
##################################### FGSM #############################################
EVAL_LOG_NAME = 'results/'+EXP_NAME+'.txt'
log_file = open(EVAL_LOG_NAME,'a')
msg = '##################### FGSM ####################\n'
log_file.write(msg)
log_file.close()
for eps in np.arange(0,0.301,0.05):
    i = 0
    accuracy = 0
    for data, target in test_loader:
        adv = FGSM_Attack_step(model,loss,data,target,eps=eps,steps=1)
        data   = Variable(adv).cuda()
        target = Variable(target).cuda()
        out = model(data)
        prediction = out.data.max(1)[1]
        accuracy = accuracy + prediction.eq(target.data).sum()
        i = i + 1
    acc = (accuracy.item()*1.0) / (test_size) * 100
    log_file = open(EVAL_LOG_NAME,'a')
    msg = 'eps,'+str(eps)+',Acc,'+str(acc)+'\n'
    log_file.write(msg)
    log_file.close()
##################################### iFGSM #############################################
log_file = open(EVAL_LOG_NAME,'a')
msg = '##################### iFGSM: step=40 ####################\n'
log_file.write(msg)
log_file.close()
for eps in np.arange(0.05,0.301,0.05):
    i = 0
    accuracy = 0
    for data, target in test_loader:
        adv = FGSM_Attack_step(model,loss,data,target,eps=eps,steps=40)
        data   = Variable(adv).cuda()
        target = Variable(target).cuda()
        out = model(data)
        prediction = out.data.max(1)[1]
        accuracy = accuracy + prediction.eq(target.data).sum()
        i = i + 1
    acc = (accuracy.item()*1.0) / (test_size) * 100
    log_file = open(EVAL_LOG_NAME,'a')
    msg = 'eps,'+str(eps)+',Acc,'+str(acc)+'\n'
    log_file.write(msg)
    log_file.close()
##################################### MSPGD #############################################

def MSPGD(model,loss,data,target,eps=0.3,eps_iter=0.01,bounds=[],steps=[7,20,50,100,500]):
    """
    model
    loss : loss used for training
    data : input to network
    target : ground truth label corresponding to data
    eps : perturbation srength added to image
    eps_iter
    """
    #Raise error if in training mode
    if model.training:
        assert 'Model is in  training mode'
    tar = Variable(target.cuda())
    data = data.cuda()
    B,C,H,W = data.size()
    noise  = torch.FloatTensor(np.random.uniform(-eps,eps,(B,C,H,W))).cuda()
    noise  = torch.clamp(noise,-eps,eps)
    img_arr = []
    for step in range(steps[-1]):
        # convert data and corresponding into cuda variable
        img = data + noise
        img = Variable(img,requires_grad=True)
        # make gradient of img to zeros
        img.grad = None
        # forward pass
        out  = model(img)
        #compute loss using true label
        cost = loss(out,tar)
        #backward pass
        cost.backward()
        #get gradient of loss wrt data
        per =  torch.sign(img.grad.data)
        #convert eps 0-1 range to per channel range
        per[:,0,:,:] = (eps_iter * (bounds[0,1] - bounds[0,0])) * per[:,0,:,:]
        if(per.size(1)>1):
            per[:,1,:,:] = (eps_iter * (bounds[1,1] - bounds[1,0])) * per[:,1,:,:]
            per[:,2,:,:] = (eps_iter * (bounds[2,1] - bounds[2,0])) * per[:,2,:,:]
        #  ascent
        adv = img.data + per.cuda()
        #clip per channel data out of the range
        img.requires_grad =False
        img[:,0,:,:] = torch.clamp(adv[:,0,:,:],bounds[0,0],bounds[0,1])
        if(per.size(1)>1):
            img[:,1,:,:] = torch.clamp(adv[:,1,:,:],bounds[1,0],bounds[1,1])
            img[:,2,:,:] = torch.clamp(adv[:,2,:,:],bounds[2,0],bounds[2,1])
        img = img.data
        noise = img - data
        noise  = torch.clamp(noise,-eps,eps)
        for j in range(len(steps)):
            if step == steps[j]-1:
                img_tmp = data + noise
                img_arr.append(img_tmp)
                break
    return img_arr

##################################### PGD, steps=[20,40,100,500] #############################################
log_file = open(EVAL_LOG_NAME,'a')
msg = '##################### PGD: steps=[20,40,100,500],eps_iter=0.01 ####################\n'
log_file.write(msg)
log_file.close()
all_steps = [20,40,100,500]
num_steps = len(all_steps)
eps = 0.3
i = 0
acc_arr = torch.zeros((num_steps))
for data, target in test_loader:
    adv_arr = MSPGD(model,loss,data,target,eps=eps,eps_iter=0.01,bounds=np.array([[0,1],[0,1],[0,1]]),steps=all_steps)
    target = Variable(target).cuda()
    for j in range(num_steps):
        data   = Variable(adv_arr[j]).cuda()
        out = model(data)
        prediction = out.data.max(1)[1]
        acc_arr[j] = acc_arr[j] + prediction.eq(target.data).sum()
    i = i + 1
print(acc_arr)
for j in range(num_steps):
    acc_arr[j] = (acc_arr[j].item()*1.0) / (test_size) * 100
    log_file = open(EVAL_LOG_NAME,'a')
    msg = 'eps,'+str(eps)+',steps,'+str(all_steps[j])+',Acc,'+str(acc_arr[j])+'\n'
    log_file.write(msg)
    log_file.close()



def max_margin_loss(x,y):
    B = y.size(0)
    corr = x[range(B),y]

    x_new = x - 1000*torch.eye(10)[y].cuda()
    tar = x[range(B),x_new.argmax(dim=1)]
    loss = tar - corr
    loss = torch.mean(loss)

    return loss


def GAMA_PGD(model,data,target,eps,eps_iter,bounds,steps,w_reg,lin,SCHED,drop):

    #Raise error if in training mode
    if model.training:
        assert 'Model is in  training mode'
    tar = Variable(target.cuda())
    data = data.cuda()
    B,C,H,W = data.size()
    noise  = torch.FloatTensor(np.random.uniform(-eps,eps,(B,C,H,W))).cuda()
    noise  = eps*torch.sign(noise)
    img_arr = []
    W_REG = w_reg
    orig_img = data+noise
    orig_img = Variable(orig_img,requires_grad=True)
    for step in range(steps[-1]):
        # convert data and corresponding into cuda variable
        img = data + noise
        img = Variable(img,requires_grad=True)

        if step in SCHED:
            eps_iter /= drop

        # make gradient of img to zeros
        img.grad = None
        # forward pass
        orig_out = model(orig_img)
        P_out = nn.Softmax(dim=1)(orig_out)

        out  = model(img)
        Q_out = nn.Softmax(dim=1)(out)
        #compute loss using true label
        if step <= lin:
            cost =  W_REG*((P_out - Q_out)**2.0).sum(1).mean(0) + max_margin_loss(Q_out,tar)
            W_REG -= w_reg/lin
        else:
            cost = max_margin_loss(Q_out,tar)
        #backward pass
        cost.backward()
        #get gradient of loss wrt data
        per =  torch.sign(img.grad.data)
        #convert eps 0-1 range to per channel range
        per[:,0,:,:] = (eps_iter * (bounds[0,1] - bounds[0,0])) * per[:,0,:,:]
        if(per.size(1)>1):
            per[:,1,:,:] = (eps_iter * (bounds[1,1] - bounds[1,0])) * per[:,1,:,:]
            per[:,2,:,:] = (eps_iter * (bounds[2,1] - bounds[2,0])) * per[:,2,:,:]
        #  ascent
        adv = img.data + per.cuda()
        #clip per channel data out of the range
        img.requires_grad =False
        img[:,0,:,:] = torch.clamp(adv[:,0,:,:],bounds[0,0],bounds[0,1])
        if(per.size(1)>1):
            img[:,1,:,:] = torch.clamp(adv[:,1,:,:],bounds[1,0],bounds[1,1])
            img[:,2,:,:] = torch.clamp(adv[:,2,:,:],bounds[2,0],bounds[2,1])
        img = img.data
        noise = img - data
        noise  = torch.clamp(noise,-eps,eps)

        for j in range(len(steps)):
            if step == steps[j]-1:
                img_tmp = data + noise
                img_arr.append(img_tmp)
                break
    return img_arr



SCHED = [50,75]
drop = 10
lin = 50
w_reg = 5
##################################### GAMA PGD, steps=[60,85,90,100] #############################################
log_file = open(EVAL_LOG_NAME,'a+')
msg = '##################### Gama-PGD Wreg5 lin50, drop by 10 at [50,75]: steps=[60,85,90,100], eps_iter_init=0.3  ####################\n'
log_file.write(msg)
log_file.close()
all_steps = [60,85,90,100]
num_steps = len(all_steps)
eps = 0.3
i = 0
acc_arr = torch.zeros((num_steps))
for data, target in test_loader:
    adv_arr = GAMA_PGD(model,data,target,eps=eps,eps_iter=0.3,bounds=np.array([[0,1],[0,1],[0,1]]),steps=all_steps,w_reg=w_reg,lin=lin,SCHED=SCHED,drop=drop)
    target = Variable(target).cuda()
    for j in range(num_steps):
        data   = Variable(adv_arr[j]).cuda()
        out = model(data)
        prediction = out.data.max(1)[1]
        acc_arr[j] = acc_arr[j] + prediction.eq(target.data).sum()
    i = i + 1
print(acc_arr)
for j in range(num_steps):
    acc_arr[j] = (acc_arr[j].item()*1.0) / (test_size) * 100
    log_file = open(EVAL_LOG_NAME,'a+')
    msg = 'eps,'+str(eps)+',steps,'+str(all_steps[j])+',Acc,'+str(acc_arr[j])+'\n'
    log_file.write(msg)
    log_file.close()

Cudnn status: True
MNIST dataloader: Done
| Epoch [  0/ 50] Iter[391/390] : Loss:1.463496 		Epoch: 0  Time taken: 10.456432342529297
| Epoch [  1/ 50] Iter[391/390] : Loss:0.974477 		Epoch: 1  Time taken: 10.182543754577637
| Epoch [  2/ 50] Iter[391/390] : Loss:0.758520 		Epoch: 2  Time taken: 10.31870436668396
| Epoch [  3/ 50] Iter[391/390] : Loss:0.886610 		Epoch: 3  Time taken: 10.194785118103027
| Epoch [  4/ 50] Iter[391/390] : Loss:0.592734 		Epoch: 4  Time taken: 10.431707382202148
| Epoch [  5/ 50] Iter[391/390] : Loss:0.336903 		Epoch: 5  Time taken: 10.326686382293701
| Epoch [  6/ 50] Iter[391/390] : Loss:0.295170 		Epoch: 6  Time taken: 10.270173788070679
| Epoch [  7/ 50] Iter[391/390] : Loss:0.279707 		Epoch: 7  Time taken: 10.286213874816895
| Epoch [  8/ 50] Iter[391/390] : Loss:0.186130 		Epoch: 8  Time taken: 10.193046808242798
| Epoch [  9/ 50] Iter[391/390] : Loss:0.177573 		Epoch: 9  Time taken: 10.355818033218384
| Epoch [ 10/ 50] Iter[391/390] : Loss:0.085248 	

  model.load_state_dict(torch.load(model_name))


| Epoch [ 49/ 50] : Acc:96.835938 		

  model.load_state_dict(torch.load(model_name))


tensor([9693., 9520., 9396., 9382.])
tensor([9073., 9025., 9024., 9024.])
