# ITS52000 - Applied Machine Learning Final Project by 

#### Rubabul Karim (karim12@pnw.edu), 
#### Rusaful Karim (karim11@pnw.edu), 
#### Wahaj (mwahajud@pnw.edu), 
#### Mohammed Al Hamad (hamad@pnw.edu), 

### To - Prof. Ricardo Calix, Ph.D (rcalix@pnw.edu)


This project is an elaborate demonstration of how the accuracy of datasets can be manipulated by attacking it with morphed data from the dataset and training the dataset with these 'adversarial images'. This report will include the following:

b) A ML baseline for image classification.

c) The Performance Metrics of both before and after the adversarial attack has been done.

d) A demonstration of adding adversarial images to the training process to try and manipulate the end result. 

e) A conclusive display of how many adversarial images (Target: 5%) are needed to lower performance significantly.

-------------------------------------------------------------------------------------------------------------------

### Installing packages

In [1]:
import torch
import numpy as np
## import imageio
import os

import torch.optim as optim
import torch.nn as nn

from torchvision import datasets
from torchvision import transforms
import tensorflow as tf

from PIL import Image
import cv2
import sklearn

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score, accuracy_score, f1_score


### Dataset downloading and Data Augmentation

In [2]:
data_path = 'data/MNISTdata/'

train = datasets.MNIST(data_path, train=True, download=True)
test = datasets.MNIST(data_path, train=False, download=True)

print(train)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNISTdata/MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting data/MNISTdata/MNIST\raw\train-images-idx3-ubyte.gz to data/MNISTdata/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNISTdata/MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting data/MNISTdata/MNIST\raw\train-labels-idx1-ubyte.gz to data/MNISTdata/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNISTdata/MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting data/MNISTdata/MNIST\raw\t10k-images-idx3-ubyte.gz to data/MNISTdata/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNISTdata/MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting data/MNISTdata/MNIST\raw\t10k-labels-idx1-ubyte.gz to data/MNISTdata/MNIST\raw

Dataset MNIST
    Number of datapoints: 60000
    Root location: data/MNISTdata/
    Split: Train


In [3]:
print(test)

Dataset MNIST
    Number of datapoints: 10000
    Root location: data/MNISTdata/
    Split: Test


In [4]:
img, label = train[79]

In [5]:
print(   dir(transforms)   )

['AutoAugment', 'AutoAugmentPolicy', 'CenterCrop', 'ColorJitter', 'Compose', 'ConvertImageDtype', 'FiveCrop', 'GaussianBlur', 'Grayscale', 'InterpolationMode', 'Lambda', 'LinearTransformation', 'Normalize', 'PILToTensor', 'Pad', 'RandAugment', 'RandomAdjustSharpness', 'RandomAffine', 'RandomApply', 'RandomAutocontrast', 'RandomChoice', 'RandomCrop', 'RandomEqualize', 'RandomErasing', 'RandomGrayscale', 'RandomHorizontalFlip', 'RandomInvert', 'RandomOrder', 'RandomPerspective', 'RandomPosterize', 'RandomResizedCrop', 'RandomRotation', 'RandomSolarize', 'RandomVerticalFlip', 'Resize', 'TenCrop', 'ToPILImage', 'ToTensor', 'TrivialAugmentWide', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'autoaugment', 'functional', 'functional_pil', 'functional_tensor', 'transforms']


In [6]:
to_tensor = transforms.ToTensor()
img_t = to_tensor(img)

print(img_t.shape)


torch.Size([1, 28, 28])


In [7]:
tensor_train = datasets.MNIST(data_path, train=True, download=False, transform=transforms.ToTensor())

In [8]:
img_t , _ = tensor_train[99]

### Normalize

In [9]:
imgs_list = [ img_t  for img_t, _ in tensor_train ]

imgs = torch.stack( imgs_list, dim=3 )

print(   imgs.shape   )

torch.Size([1, 28, 28, 60000])


In [10]:
view1 = imgs.view(1, -1)
print(view1.shape)
view1 = view1.mean(dim=1)
print(view1)

torch.Size([1, 47040000])
tensor([0.1307])


In [11]:
view2 = imgs.view(1, -1).std(dim=1)
print(view2)

tensor([0.3081])


In [12]:
transformed_train = datasets.MNIST(data_path, train=True, download= False,
                                      transform = transforms.Compose([
                                          transforms.ToTensor(),
                                          transforms.Normalize(view1, view2)
                                      ]))


transformed_test = datasets.MNIST(data_path, train=False, download= False,
                                      transform = transforms.Compose([
                                          transforms.ToTensor(),
                                          transforms.Normalize(view1, view2)
                                      ]))

In [13]:
#label_map = {0:0, 2:1}


     

#class_names = ['airplane', 'bird']


     

#cifar2 = [  (img, label_map[label])  for img, label in transformed_cifar10 if label in [0, 2]    ]


     

#cifar2_val = [  (img, label_map[label])  for img, label in transformed_cifar10_val if label in [0, 2]    ]


### Softmax

In [14]:
def softmax(x):
    return torch.exp(x) / torch.exp(x).sum()

### Architectures

In [15]:
model_2DL = nn.Sequential(
     nn.Linear(784, 512),
     nn.ReLU(),                  ## nn.GeLU()
     nn.Linear(512, 128),
     nn.ReLU(),
     nn.Linear(128, 64),
     nn.ReLU(),
     nn.Linear(64, 10),
     nn.LogSoftmax(dim=1)
)
#opt = optim.SGD(model.parameters(), lr=LEARNING_RATE, weight_decay=DECAY)

### Loss Function

In [16]:
loss_fn = nn.CrossEntropyLoss()

### DataLoader

In [17]:
train_loader = torch.utils.data.DataLoader(transformed_train, batch_size=64, shuffle=True)

## model_fn = model_mlp
model_fn = model_2DL


### Training process

In [18]:
learning_rate = 0.0001  ## 1e-2    ## 0.001

n_epochs = 100

## optimizer = optim.SGD(  model_fn.parameters(), lr=learning_rate )
optimizer = optim.Adam(  model_fn.parameters(), lr=learning_rate )


In [20]:
for epoch in range(n_epochs):
    for imgs, labels in train_loader:       
        ## print(imgs.shape)
        ## resize for network
        batch_size = imgs.shape[0]
        imgs_resized = imgs.view(batch_size, -1)  
        ## print(imgs_resized.shape)
        outputs = model_fn(imgs_resized)
        loss = loss_fn(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(loss)


tensor(0.0774, grad_fn=<NllLossBackward0>)
tensor(0.2618, grad_fn=<NllLossBackward0>)


KeyboardInterrupt: 

### All performance metrics

In [21]:
print(len(transformed_test))

10000


In [22]:
val_loader = torch.utils.data.DataLoader(   transformed_test, batch_size=10000, shuffle=False  )

In [23]:
def print_stats_percentage_train_test(algorithm_name, y_test, y_pred):    
     print("------------------------------------------------------")
     print("------------------------------------------------------")
    
     print("algorithm is: ", algorithm_name)
        
     print('Accuracy: %.2f' % accuracy_score(y_test,   y_pred) )
     
     confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
     print("confusion matrix")
     print(confmat)
     print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, average='weighted'))
   

In [24]:

with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]
        outputs    = model_fn(    imgs.view(batch_size, -1)     )
        _, preds = torch.max(  outputs, dim=1  )
        print_stats_percentage_train_test("2DL", labels, preds)

------------------------------------------------------
------------------------------------------------------
algorithm is:  2DL
Accuracy: 0.97
confusion matrix
[[ 969    0    0    1    1    3    2    1    3    0]
 [   0 1120    3    0    0    1    4    1    6    0]
 [   6    1  999    4    3    0    3    6   10    0]
 [   0    1    1  997    0    3    0    2    5    1]
 [   2    0    4    0  960    0    3    1    2   10]
 [   4    0    0    7    2  863    6    0    8    2]
 [   7    3    0    0    3    4  936    0    5    0]
 [   1    8   12    3    1    1    0  993    5    4]
 [   3    0    2   10    3    3    4    3  944    2]
 [   4    5    0    9   15    2    0    9    9  956]]
Precision: 0.974
Recall: 0.974
F1-measure: 0.974


1. We need to read all the images in the correct folder.
2. We need to convert those images into numpy array -> and then into torch tensors. (torch.from_numpy)
3. We need to upload these tensors from the correct folder in to the correct array [train, test]
4. Re-run the code.

In [25]:
folder_path = "data/AdversarialFolder/trainingSet/trainingSet/1"

#for img in folder_path:
#    images = [cv2.imread(img),cv2.IMREAD_GRAYSCALE]# for img in folder_path]
#train = preprocessing.normalize([images])
#t = 
for img in folder_path:
    try:
        image = Image.open(img)
        t.append(np.array(image))
        tr = tr.append(torch.from_numpy(t))
    except Exception:
        pass

print(tr)

NameError: name 'tr' is not defined

In [26]:
#transform = transforms.Compose([
#                                transforms.ToTensor(),
#                                transforms.Normalize(view1, view2)
#                                ])
#tr = np.asarray(images)
#train = transform(tr)
#print(images)
#print(train.shape)

In [27]:
folder_path = "data/AdversarialFolder/testSet/testSet/"
for img in folder_path:
    try:
        image = Image.open(img)
        t_test = t.append(np.array(img))
        test = test.append(torch.from_numpy(t_test))
    except Exception:
        pass

print(test)

Dataset MNIST
    Number of datapoints: 10000
    Root location: data/MNISTdata/
    Split: Test


In [28]:
img, label = train[3]

In [29]:
img_t = to_tensor(img)
print(img_t.shape)

torch.Size([1, 28, 28])


In [30]:
#tensor_train = datasets.MNIST(data_path, train=True, download=False, transform=transforms.ToTensor())

for i in train:
    try:
        img, label = train[i]
        img_t=to_tensor(img)
        tensor_train= tensor_train.append(img_t)
    except Exception:
        pass

In [31]:
img_t , _ = tensor_train[99]

In [32]:
imgs_list = [ img_t  for img_t, _ in tensor_train ]

imgs = torch.stack( imgs_list, dim=3 )

print(   imgs.shape   )

torch.Size([1, 28, 28, 60000])


In [33]:
view1 = imgs.view(1, -1)
print(view1.shape)
view1 = view1.mean(dim=1)
print(view1)

torch.Size([1, 47040000])
tensor([0.1307])


In [34]:
view2 = imgs.view(1, -1).std(dim=1)
print(view2)

tensor([0.3081])


In [35]:
#train_loader = torch.utils.data.DataLoader(transformed_train, batch_size=64, shuffle=True)
train_loader = torch.utils.data.DataLoader(tensor_train, batch_size=64, shuffle=True)

## model_fn = model_mlp
model_fn = model_2DL

In [36]:
learning_rate = 0.0001  ## 1e-2    ## 0.001

n_epochs = 100

## optimizer = optim.SGD(  model_fn.parameters(), lr=learning_rate )
optimizer = optim.Adam(  model_fn.parameters(), lr=learning_rate )


In [37]:
for epoch in range(n_epochs):
    for imgs, labels in train_loader:       
        ## print(imgs.shape)
        ## resize for network
        batch_size = imgs.shape[0]
        imgs_resized = imgs.view(batch_size, -1)  
        ## print(imgs_resized.shape)
        outputs = model_fn(imgs_resized)
        loss = loss_fn(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(loss)


tensor(0.0742, grad_fn=<NllLossBackward0>)
tensor(0.0080, grad_fn=<NllLossBackward0>)
tensor(0.0438, grad_fn=<NllLossBackward0>)


KeyboardInterrupt: 

In [38]:
print(len(tensor_train))

60000


In [39]:
val_loader = torch.utils.data.DataLoader( tensor_train, batch_size=60000, shuffle=False  )

In [40]:
def print_stats_percentage_train_test(algorithm_name, y_test, y_pred):    
     print("------------------------------------------------------")
     print("------------------------------------------------------")
    
     print("algorithm is: ", algorithm_name)
        
     print('Accuracy: %.2f' % accuracy_score(y_test,   y_pred) )
     
     confmat = confusion_matrix(y_true=y_test, y_pred=y_pred)
     print("confusion matrix")
     print(confmat)
     print('Precision: %.3f' % precision_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('Recall: %.3f' % recall_score(y_true=y_test, y_pred=y_pred, average='weighted'))
     print('F1-measure: %.3f' % f1_score(y_true=y_test, y_pred=y_pred, average='weighted'))
   

In [41]:

with torch.no_grad():
    for imgs, labels in val_loader:
        batch_size = imgs.shape[0]
        outputs    = model_fn(    imgs.view(batch_size, -1)     )
        _, preds = torch.max(  outputs, dim=1  )
        print_stats_percentage_train_test("2DL", labels, preds)

------------------------------------------------------
------------------------------------------------------
algorithm is:  2DL
Accuracy: 0.98
confusion matrix
[[5884    1    2    0    1    2   19    1   10    3]
 [   1 6673   14   11    8    0   10   12   12    1]
 [  21   10 5863   13   11    1    9   14   14    2]
 [   4    2   37 5994    3   37    0   15   27   12]
 [   1    9    3    1 5779    1   23    2    4   19]
 [   9    4    2   22    4 5314   45    1   12    8]
 [   9    5    0    0    4    3 5892    0    5    0]
 [   5   19   31    4   14    0    1 6174    3   14]
 [  12   24    8   22    7   12   23    2 5731   10]
 [  16   10    0   25   52    9    2   34   21 5780]]
Precision: 0.985
Recall: 0.985
F1-measure: 0.985
