# Adversarial Training
A first approach to make the CNN model more robust to gradient-based adversarial attacks. <br>
It consists of finetuning the pretrained model with both clean and adversarial examples. <br>

In [1]:
import os
import numpy as np
import torch
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from collections import Counter
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import utils
import random

# reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

### Preprocessing

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

dataset_folder = "mri_brain_tumor"

train_dataset = datasets.ImageFolder(root=f'{dataset_folder}/Training', transform=transform)
test_dataset = datasets.ImageFolder(root=f'{dataset_folder}/Testing', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=1)

In [3]:
#mean, std = utils.compute_mean_std(train_loader)
mean, std = torch.tensor([0.1855]*3), torch.tensor([0.1813]*3) # precomputed, to save time

print(f"Mean: {mean}")
print(f"Std: {std}")

Mean: tensor([0.1855, 0.1855, 0.1855])
Std: tensor([0.1813, 0.1813, 0.1813])


In [4]:
# Transformation for training dataset with data augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),     # Randomly flip the image horizontally
    transforms.RandomRotation(10),         # Randomly rotate the image by 10 degrees
    transforms.Resize((224, 224)),         # Resize the image to 224x224
    transforms.ToTensor(),                 # Convert the image to a tensor
    transforms.Normalize(mean=mean, std=std)  # Normalize the image
])

# Transformation for testing dataset without data augmentation
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),         # Resize the image to 224x224
    transforms.ToTensor(),                 # Convert the image to a tensor
    transforms.Normalize(mean=mean, std=std)  # Normalize the image
])

# Apply the transformations to the datasets
train_dataset = datasets.ImageFolder(root=f'{dataset_folder}/Training', transform=train_transform)
test_dataset = datasets.ImageFolder(root=f'{dataset_folder}/Testing', transform=test_transform)


train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=1)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=True, num_workers=1)

### Loading pretrained model

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device.")

model = utils.CNN(num_classes=4).to(device)
#load weights
model.load_state_dict(torch.load('weights/cnn.pth'))
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params}")
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)


Using cuda device.


  model.load_state_dict(torch.load('weights/cnn.pth'))


Total number of parameters: 51476484


## Adversarial train function
Use both original and noisy images to train the model. <br>
We also apply image augmentation techniques to make the model more robust to adversarial attacks. <br>


In [6]:

def adversarial_train(dataloader, model, device, num_epochs, optimizer, criterion, attack_function, attack_params):
    """
    This function trains the model using adversarial training. It generates adversarial images using the attack function and then appends them to the original images.
    Args:
    - dataloader: the dataloader for the dataset
    - model: the model to train
    - device: the device to train on
    - num_epochs: the number of epochs to train
    - attack function: generates the adversarial images (can be fgsm or pgd)
    - attack_params: method-specific parameters for the attack function (dictionary)
    """

    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        batch_loss = 0.0

        #print(f"Epoch [{epoch+1}/{num_epochs}]")
        for batch_idx, (images, labels) in enumerate(tqdm(dataloader, desc=f"Epoch [{epoch+1}/{num_epochs}]")):

            images, labels = images.to(device), labels.to(device)
            attack_images = attack_function(model, criterion, images, labels, device, **attack_params) #the first arguments are the same, the rest are the method-specific attack_params
            #append the attack images to the images
            images = torch.cat((images, attack_images), 0)
            labels = torch.cat((labels, labels), 0)
            
            # apply data augmentation to the images
            #transformed_images = torch.stack([transform(image.cpu()).to(device) for image in images])

            optimizer.zero_grad()
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
        
        print(f"Loss: {running_loss/len(dataloader):.4f}")
        running_loss = 0.0

    print("Training complete.")

# FGSM
Let's train the model with FGSM adversarial examples. <br>

In [7]:
#define a new model
robust_model = utils.CNN(num_classes=4).to(device)
robust_model.load_state_dict(torch.load('weights/cnn.pth'))
optimizer = optim.Adam(robust_model.parameters(), lr=0.0001)
criterion=nn.CrossEntropyLoss()
attack_function = utils.fgsm_attack
attack_params = {"epsilon": 0.1}
adversarial_train(train_loader, robust_model, device, 5, optimizer, criterion,
                  attack_function, attack_params)

  robust_model.load_state_dict(torch.load('weights/cnn.pth'))
Epoch [1/5]: 100%|██████████| 714/714 [01:38<00:00,  7.25it/s]


Loss: 0.5195


Epoch [2/5]: 100%|██████████| 714/714 [01:40<00:00,  7.13it/s]


Loss: 0.2834


Epoch [3/5]: 100%|██████████| 714/714 [01:41<00:00,  7.02it/s]


Loss: 0.1910


Epoch [4/5]: 100%|██████████| 714/714 [01:36<00:00,  7.40it/s]


Loss: 0.1330


Epoch [5/5]: 100%|██████████| 714/714 [01:33<00:00,  7.61it/s]

Loss: 0.1148
Training complete.





In [8]:
torch.save(robust_model.state_dict(),'weights/robust_fgsm.pth')

## Evaluate

In [10]:
epsilon_values=[ 0.1, 0.01, 0.005, 0.002, 0.001]
for eps in epsilon_values:
    print("\n\n--------- EPSILON VALUE: ", eps, " ---------")
    utils.compare_eval(robust_model,test_loader, criterion, device, attack_function=utils.fgsm_attack, attack_params={"epsilon": eps})



--------- EPSILON VALUE:  0.1  ---------

ROBUST MODEL


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  4.81it/s]


Original accuracy: 	0.95
Adversarial accuaracy: 	0.96


--------- EPSILON VALUE:  0.01  ---------

ROBUST MODEL


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  4.88it/s]


Original accuracy: 	0.96
Adversarial accuaracy: 	0.70


--------- EPSILON VALUE:  0.005  ---------

ROBUST MODEL


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  5.06it/s]


Original accuracy: 	0.95
Adversarial accuaracy: 	0.75


--------- EPSILON VALUE:  0.002  ---------

ROBUST MODEL


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  5.09it/s]


Original accuracy: 	0.96
Adversarial accuaracy: 	0.79


--------- EPSILON VALUE:  0.001  ---------

ROBUST MODEL


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  5.02it/s]

Original accuracy: 	0.96
Adversarial accuaracy: 	0.81





# PGD - Projected Gradient Descent
A more advanced attack than FGSM, that still uses the gradient of the loss function to generate adversarial examples, but it applies multiple steps of small perturbations to the input image. <br>
Since it works similarly to FGSM, we can use the same adversarial training approach to make the model more robust to PGD attacks, this time using the PGD adversarial examples. <br>


In [11]:
#define a new model
robust_model_pdg = utils.CNN(num_classes=4).to(device)
robust_model_pdg.load_state_dict(torch.load('weights/cnn.pth'))
optimizer = optim.Adam(robust_model_pdg.parameters(), lr=0.0001)
criterion=nn.CrossEntropyLoss()
pgd = utils.pgd_attack
pgd_params = {"eps":0.01, "alpha":0.001, "num_iter":5}
adversarial_train(train_loader, robust_model_pdg, device, 5, optimizer, criterion,
                  pgd, pgd_params)

  robust_model_pdg.load_state_dict(torch.load('weights/cnn.pth'))
Epoch [1/5]: 100%|██████████| 714/714 [03:30<00:00,  3.39it/s]


Loss: 0.3478


Epoch [2/5]: 100%|██████████| 714/714 [03:34<00:00,  3.32it/s]


Loss: 0.2378


Epoch [3/5]: 100%|██████████| 714/714 [03:31<00:00,  3.38it/s]


Loss: 0.1993


Epoch [4/5]: 100%|██████████| 714/714 [03:32<00:00,  3.37it/s]


Loss: 0.1694


Epoch [5/5]: 100%|██████████| 714/714 [03:28<00:00,  3.42it/s]

Loss: 0.1415
Training complete.





In [12]:
#save/load weights
torch.save(robust_model_pdg.state_dict(),'weights/robust_pgd.pth')
#robust_model_pdg.load_state_dict(torch.load('weights/robust_pgd.pth'))

### Evaluate PGD
Keeping epsilon constant to 0.01, changing alpha.

In [13]:
alphas=[0.001, 0.0005, 0.0001]
for alpha in alphas:
    print("\n\n--------- ALPHA VALUE: ", alpha, " ---------")
    utils.compare_eval(robust_model_pdg,test_loader, criterion, device, attack_function=utils.pgd_attack, attack_params={"eps":0.01, "alpha":alpha, "num_iter":5})




--------- ALPHA VALUE:  0.001  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:11<00:00,  2.67it/s]


Original accuracy: 	0.97
Adversarial accuaracy: 	0.91


--------- ALPHA VALUE:  0.0005  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:11<00:00,  2.63it/s]


Original accuracy: 	0.97
Adversarial accuaracy: 	0.95


--------- ALPHA VALUE:  0.0001  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:11<00:00,  2.66it/s]

Original accuracy: 	0.98
Adversarial accuaracy: 	0.94





# Cross evaluations
Let's evaluate the model trained with FGSM adversarial examples using PGD adversarial examples, and vice-versa. <br>
This way we can see how the techniques generalize to different types of adversarial attacks. <br>
Each version has its pros and cons:
- FGSM is faster to compute, but less effective than PGD.
- PGD is more effective, but slower to compute.

### Evaluate FGSM using the PGD-trained model

In [16]:
fgsm=utils.fgsm_attack
epsilon_values=[0.1, 0.01, 0.005, 0.002, 0.001]
for eps in epsilon_values:
    print("\n\n--------- EPSILON VALUE: ", eps, " ---------")
    utils.compare_eval(robust_model_pdg,test_loader, criterion, device, fgsm, attack_params={"epsilon": eps})



--------- EPSILON VALUE:  0.1  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  4.86it/s]


Original accuracy: 	0.95
Adversarial accuaracy: 	0.54


--------- EPSILON VALUE:  0.01  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  5.04it/s]


Original accuracy: 	0.95
Adversarial accuaracy: 	0.91


--------- EPSILON VALUE:  0.005  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  4.91it/s]


Original accuracy: 	0.94
Adversarial accuaracy: 	0.88


--------- EPSILON VALUE:  0.002  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  5.08it/s]


Original accuracy: 	0.96
Adversarial accuaracy: 	0.92


--------- EPSILON VALUE:  0.001  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:06<00:00,  5.14it/s]

Original accuracy: 	0.96
Adversarial accuaracy: 	0.91





### Evaluate PGD using the FGSM-trained model

In [18]:
alphas=[0.001, 0.0005, 0.0001]
for alpha in alphas:
    print("\n\n--------- ALPHA VALUE: ", alpha, " ---------")
    utils.compare_eval(robust_model,test_loader, criterion, device, attack_function=utils.pgd_attack, attack_params={"eps":0.01, "alpha":alpha, "num_iter":5})




--------- ALPHA VALUE:  0.001  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:11<00:00,  2.76it/s]


Original accuracy: 	0.96
Adversarial accuaracy: 	0.73


--------- ALPHA VALUE:  0.0005  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:11<00:00,  2.75it/s]


Original accuracy: 	0.96
Adversarial accuaracy: 	0.75


--------- ALPHA VALUE:  0.0001  ---------


Testing Progress:  97%|█████████▋| 31/32 [00:11<00:00,  2.78it/s]

Original accuracy: 	0.96
Adversarial accuaracy: 	0.85



