# ITS5200 - Applied Machine Learning Final Project by 

#### Rubabul Karim (karim12@pnw.edu), 
#### Rusaful Karim (karim11@pnw.edu), 
#### Wahaj (mwahajud@pnw.edu), 
#### Mohammed Al Hamad (hamad@pnw.edu), 

### To - Prof. Ricardo Calix, Ph.D (rcalix@pnw.edu)


This project is an elaborate demonstration of how the accuracy of datasets can be manipulated by attacking it with morphed data from the dataset and training the dataset with these 'adversarial images'. This report will include the following:

b) A ML baseline for image classification.

c) The Performance Metrics of both before and after the adversarial attack has been done.

d) A demonstration of adding adversarial images to the training process to try and manipulate the end result. 

e) A conclusive display of how many adversarial images (Target: 5%) are needed to lower performance significantly.

-------------------------------------------------------------------------------------------------------------------

### We start by downloading the core libraries for the project

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.autograd import Variable
from torch.utils.data import DataLoader

import torchvision.models as models
from torchvision import transforms
from torchvision import datasets
import time
import tqdm
import copy
import requests, io
from PIL import Image
from icecream import ic
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from sklearn.datasets import load_iris
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, r2_score, accuracy_score, mean_absolute_error, mean_squared_error, mean_squared_error, recall_score, precision_score


%matplotlib inline

### For this Project, we are using the STL10 Dataset

The STL-10 dataset is an image recognition dataset for developing unsupervised feature learning, deep learning, self-taught learning algorithms. It is inspired by the CIFAR-10 dataset but with some modifications.

In [2]:
train_dataset = datasets.STL10(root='./data', split = 'train', transform=transforms.ToTensor(), download=True)
test_dataset = datasets.STL10(root='./data', split = 'test', transform=transforms.ToTensor())

Files already downloaded and verified


### After loading the dataset, we set the parameters that will be used for Training the data

In [3]:
BATCH_SIZE = 8
NUM_OF_EPOCHS = 20
LEARNING_RATE = 0.01
DECAY = 0.00000 

### We can then load the data using DataLoader using the training parameters

In [4]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=1, shuffle=False)

### Using the ResNet 50 Model

In [5]:
model = models.resnet50(pretrained=True) 
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)
model.cuda()
model.eval()

AssertionError: Torch not compiled with CUDA enabled

### Setting the Loss Function

In [6]:
loss = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=DECAY)

### Training the Model before the Adversarial attack

In [7]:
def train(loader, net, criterion, optimizer):
    net.train()

    running_loss = 0.0
    running_samples = 0.0
    running_correct = 0.0

    for epoch in range(1, NUM_OF_EPOCHS+1):
        ic('Epoch: ', epoch)
        for data in loader:
            img, label = data

            running_samples += img.size(0)

            img = img.cuda()
            label = label.cuda()
            output_score = net(img) 
            loss = criterion(output_score, label)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


            _, pred = torch.max(output_score, 1)
            num_correct = (pred == label).sum().item()

            running_loss += loss.item()
            running_correct += num_correct

        average_loss = running_loss / running_samples
        average_accuracy = running_correct / running_samples

        ic('Training loss: {:.4f}, Acc: {:.4f} '.format(average_loss, average_accuracy,))
    

In [8]:
train(train_loader, model, criterion=loss, optimizer=opt) 

ic| 'Epoch: ', epoch: 1


AssertionError: Torch not compiled with CUDA enabled

### Performance Metrics for the Model

In [9]:
y_pred = []
y_true = []

for inputs, labels in test_loader:
        output = model(inputs.cuda()) 

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output) 
        
        labels = labels.data.cpu().numpy()
        y_true.extend(labels) 

classes = ('airplane', 'bird', 'car', 'cat', 'deer',
        'dog', 'horse', 'monkey', 'ship', 'truck')

cf_matrix = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) *10, index = [i for i in classes],
                     columns = [i for i in classes])

plt.figure(figsize = (12,7))
sns.heatmap(df_cm, annot=True)

avg = 'micro'
ic(f'ACCURACY OF THE MODEL: {accuracy_score(y_true, y_pred)}')
ic(f'PRECISION OF MODEL: {precision_score(y_true, y_pred, average=avg)}')
ic(f'RECALL OF MODEL: {recall_score(y_true, y_pred, average=avg)}')

AssertionError: Torch not compiled with CUDA enabled

In [10]:
def fgsm_attack(input,epsilon,data_grad):

  pert_out = input + epsilon*data_grad.sign()
  pert_out = torch.clamp(pert_out, 0, 1)
  return pert_out

def ifgsm_attack(input,epsilon,data_grad):

  iter = 10
  alpha = epsilon/iter
  pert_out = input
  for i in range(iter-1):
    pert_out = pert_out + alpha*data_grad.sign()
    pert_out = torch.clamp(pert_out, 0, 1)
    if torch.norm((pert_out-input),p=float('inf')) > epsilon:
      break
  return pert_out

def mifgsm_attack(input,epsilon,data_grad):

  iter=10
  decay_factor=1.0
  pert_out = input
  alpha = epsilon/iter
  g=0

  for i in range(iter-1):
      
    g = decay_factor*g + data_grad/torch.norm(data_grad,p=1)
    pert_out = pert_out + alpha*torch.sign(g)
    pert_out = torch.clamp(pert_out, 0, 1)

    if torch.norm((pert_out-input),p=float('inf')) > epsilon:
      break

  return pert_out

### Using Adversarial Images to evaluate Model

In [11]:
def test(model,device,test_loader,epsilon,attack):

  correct = 0
  adv_examples = []
  model.eval()

  for img, label in test_loader:
      img, label = img.to(device), label.to(device)
      img.requires_grad = True
      output = model(img)

      init_pred = output.max(1, keepdim=True)[1] 

      if init_pred.item != label.item:
          continue

      loss = loss(output, label)
      model.zero_grad()
      loss.backward()
      data_grad = img.grad.data

      if attack == "fgsm":
        perturbed_data = fgsm_attack(img,epsilon,data_grad)
      elif attack == "ifgsm":
        perturbed_data = ifgsm_attack(img,epsilon,data_grad)
      elif attack == "mifgsm":
        perturbed_data = mifgsm_attack(img,epsilon,data_grad)
        
      output_adv = model(perturbed_data)
      final_pred = output_adv.max(1, keepdim=True)[1]
      
      if final_pred.item == label.item:
          correct += 1
          if (epsilon == 0) and (len(adv_examples) < 5):
              adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
              adv_examples.append( (init_pred.item, final_pred.item, adv_ex) )
      else:
          if len(adv_examples) < 5:
              adv_ex = perturbed_data.squeeze().detach().cpu().numpy()
              adv_examples.append( (init_pred.item, final_pred.item, adv_ex) )

  final_acc = correct/float(len(test_loader))
  print("Epsilon: {}\tTest Accuracy = {} / {} = {}".format(epsilon, correct, len(test_loader), final_acc))

  return final_acc, adv_examples

### We are also able to plot the accuracy as a curve

In [12]:
epsilons = [0,0.007,0.01,0.02,0.03,0.05,0.1,0.2,0.3]

for attack in ("fgsm","ifgsm","mifgsm"):
  accuracies = []
  examples = []

  for eps in epsilons:
      acc, ex = test(model, 'cuda',train_loader,eps,attack)
      accuracies.append(acc)
      examples.append(ex)

  plt.figure(figsize=(5,5))
  plt.plot(epsilons, accuracies, "*-")
  plt.title(attack)
  plt.xlabel("Epsilon")
  plt.ylabel("Accuracy")
  plt.show()

  cnt = 0
  plt.figure(figsize=(8,10))

  for i in range(len(epsilons)):
      for j in range(len(examples[i])):

          cnt += 1
          plt.subplot(len(epsilons),len(examples[0]),cnt)
          plt.xticks([], [])
          plt.yticks([], [])

          if j == 0:
              plt.ylabel("Eps: {}".format(epsilons[i]), fontsize=14)
          orig,adv,ex = examples[i][j]
          plt.title("{} -> {}".format(orig, adv))
          plt.imshow(ex, cmap="gray")

  plt.tight_layout()
  plt.show()

AssertionError: Torch not compiled with CUDA enabled