# Adversarial samples generator
Generate the adversarial samples to test the models

**Authors**

`Marco Alecci <https://github.com/MarcoAlecci>`

`Francesco Marchiori <https://github.com/FrancescoMarchiori>`

`Luca Martinelli <https://github.com/luca-martinelli-09>`

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/luca-martinelli-09/orco-gan/blob/main/adversarialSamplesGenerator.ipynb)

## General Setup

In [1]:
import os

if not os.path.exists("./datasets"):
    !git clone "https://github.com/luca-martinelli-09/orco-gan.git"

    %cd orco-gan/

In [2]:
import sys
IN_COLAB = 'google.colab' in sys.modules

datasetToFolder = {"ddg": "ddg", "bing": "bing", "google": "google"}
googleModelsDir = None

if IN_COLAB:
  !pip install torchattacks

  from google.colab import drive
  drive.mount('/content/drive')

  googleModelsDir = "/content/drive/MyDrive/Università/Magistrale/II Anno/I Semestre/Advanced Topics in Computer and Network Security/Project/Models"
  
  datasetToFolder = {"ddg": "DuckDuckGo", "bing": "Bing", "google": "Google"}

In [3]:
import os
import time
import torch
import torchvision
import numpy as np
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image

from torchattacks import FGSM, CW, FAB, DIFGSM, DeepFool

from nonMathAttacks import NonMathAttacks

from imageLimitedDataset import ImageLimitedDataset

print("PyTorch Version:", torch.__version__)
print("Torchvision Version:", torchvision.__version__)

  warn(f"Failed to load image Python extension: {e}")


PyTorch Version: 1.10.1
Torchvision Version: 0.11.2


In [4]:
# Detect if we have a GPU available
print("CUDA available:", torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

CUDA available: True


## Settings

In [5]:
# @markdown Shuffle the dataset
shuffleDataset = False  # @param {type: "boolean"}

# @markdown Reduce the size of the dataset
datasetSize = 100  # @param {type: "integer"}

In [6]:
# datasetsGenerateOnly = ["google"] # Use this if you want to select only one dataset from bing, ddg or google
datasetsGenerateOnly = ["google"] # Use this if you want to get all the datasets

# modelsGenerateOnly = ["vgg"] # Use this if you want to select only one model from alexnet, resnet or vgg, None if select all
modelsGenerateOnly = ["vgg"] # Use this if you want to select all the models

## Setup

In [7]:
nonMathAttacks = NonMathAttacks()

In [8]:
adversarialDir = "./adversarial_samples"
datasetsDir = "./datasets"
modelsDir = googleModelsDir if googleModelsDir else "./models"

In [9]:
SEED = 151836


def setSeed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)


setSeed(SEED)


## Utils

In [10]:
def getSubDirs(dir):
    return [x for x in os.listdir(dir) if os.path.isdir(os.path.join(dir, x))]

In [11]:
def getClassPercents(sizes):
    totalSize = np.sum(np.array(sizes))
    percents = []
    for size in sizes:
        percents.append(int(round((size / totalSize) * 100)))

    return percents


In [12]:
def saveMathAdversarials(dataloader, classes, fileNames, attack, saveDir, shuffled=False):

    i = 0;
    for images, labels in dataloader:
        adversarials = attack(images, labels)

        for adversarial, label in zip(adversarials, labels):
            image = transforms.ToPILImage()(adversarial).convert("RGB")
            path = os.path.join(saveDir, classes[label])

            if not os.path.exists(path):
                os.makedirs(path)

            imageName = i + ".jpg" if shuffled else os.path.basename(fileNames[i][0])
            image.save(os.path.join(path, imageName), "JPEG")

            i += 1

            if i % 20 == 0:
                print("Sample #", i)

## Generate adversarials

In [13]:
timesEvaluations = []

In [None]:
attacks = {
    "GaussianNoise": nonMathAttacks.gaussianNoise,
    "BoxBlur": nonMathAttacks.boxBlur,
    "Sharpen": nonMathAttacks.sharpen,
    "InvertColor": nonMathAttacks.invertColor,
    "GreyScale": nonMathAttacks.greyscale,
    "SplitMergeRGB": nonMathAttacks.splitMergeRGB,
    "SaltPepper": nonMathAttacks.saltAndPepper,
    "RandomBlackBox": nonMathAttacks.randomBlackBox,
}

In [None]:
datasetsToGenerate = getSubDirs(datasetsDir) if not datasetsGenerateOnly else datasetsGenerateOnly

for dataset in datasetsToGenerate:
    print("\n" + "-" * 15)
    currentTime = time.time()
    print("[🗃️ TEST DATASET] {}".format(dataset))

    datasetDir = os.path.join(datasetsDir, dataset)
    testDir = os.path.join(datasetDir, "test")

    datasetAdvDir = os.path.join(adversarialDir, dataset)
    nonMathAttacksDir = os.path.join(datasetAdvDir, "nonMath")

    if not os.path.exists(nonMathAttacksDir):
        os.makedirs(nonMathAttacksDir)

    testDataset = ImageLimitedDataset(testDir, use_cache=True, check_images=False)

    for path, cls in testDataset.imgs:
        clsName = testDataset.classes[cls]

        imageName = os.path.basename(path)

        image = Image.open(path).convert("RGB")

        for attack in attacks:
            attacker = attacks[attack]
            
            attackDir = os.path.join(nonMathAttacksDir, attack)
            saveDir = os.path.join(attackDir, clsName)

            if not os.path.exists(saveDir):
                os.makedirs(saveDir)

            outImage = image.copy()
            outImage = attacker(outImage)
            outImage.save(os.path.join(saveDir, imageName), "JPEG")
    
    elapsedTime = time.time() - currentTime
    print("Elapsed seconds:", elapsedTime)
    timesEvaluations.append({
        "dataset": dataset,
        "math": False,
        "attack": None,
        "model": None,
        "modelDataset": None,
        "balancing": None,
        "time": elapsedTime,
    })

In [14]:
datasetsToGenerate = getSubDirs(datasetsDir) if not datasetsGenerateOnly else datasetsGenerateOnly

for dataset in datasetsToGenerate:
    print("\n" + "-" * 15)
    print("[🗃️ SOURCE DATASET] {}".format(dataset))

    datasetDir = os.path.join(datasetsDir, dataset)
    testDir = os.path.join(datasetDir, "test")

    datasetAdvDir = os.path.join(adversarialDir, dataset)
    mathAttacksDir = os.path.join(datasetAdvDir, "math")

    if not os.path.exists(mathAttacksDir):
        os.makedirs(mathAttacksDir)

    toTensor = transforms.Compose([transforms.ToTensor()])
    testDataset = ImageLimitedDataset(
        testDir, transform=toTensor, slices=[slice(0, datasetSize)], use_cache=False, check_images=False)

    setSeed(SEED)
    testDataLoader = DataLoader(
        testDataset, batch_size=16, num_workers=0, shuffle=shuffleDataset)
    
    for root, _, fnames in sorted(os.walk(os.path.join(modelsDir, datasetToFolder[dataset]), followlinks=True)):
        for fname in sorted(fnames):
            path = os.path.join(root, fname)

            try:
                modelData = torch.load(path)
            except:
                continue

            modelDataset = modelData["dataset"]
            modelName = modelData["model_name"]

            if not modelName in modelsGenerateOnly:
                torch.cuda.empty_cache()
                continue
            
            modelPercents = "_".join([str(x)
                                      for x in getClassPercents(modelData["dataset_sizes"])])
            model = modelData["model"].to(device)

            attacks = {
                "FGSM": FGSM(model, eps=8/255),
                "CW": CW(model, c=1, lr=0.01, steps=100, kappa=0),
                "FAB": FAB(model, eps=8/255, steps=100, n_classes=10, n_restarts=1, targeted=True),
                "DIFGSM": DIFGSM(model, eps=8/255, alpha=2/255, steps=100, diversity_prob=0.5, resize_rate=0.9),
                "DeepFool": DeepFool(model, steps=100),
            }

            for attack in attacks:
                attacker = attacks[attack]

                attackDir = os.path.join(
                    mathAttacksDir, attack)
                saveDir = os.path.join(
                    attackDir, modelName + "/" + modelPercents)
                
                if not os.path.exists(saveDir):
                    os.makedirs(saveDir)

                currentTime = time.time()
                print("[⚔️ ADVERSARIAL] {} - {} - {} {}".format(
                    attack,
                    modelDataset,
                    modelName,
                    modelPercents
                ))

                setSeed(SEED)
                saveMathAdversarials(testDataLoader, testDataset.classes,
                                     testDataset.imgs, attacker, saveDir, shuffled=shuffleDataset)

                elapsedTime = time.time() - currentTime
                print("Elapsed seconds:", elapsedTime)
                timesEvaluations.append({
                    "dataset": dataset,
                    "math": True,
                    "attack": attack,
                    "model": modelName,
                    "modelDataset": modelDataset,
                    "balancing": modelPercents.replace("_", "/"),
                    "time": elapsedTime,
                })

                torch.cuda.empty_cache()


---------------
[🗃️ SOURCE DATASET] google
[⚔️ ADVERSARIAL] FGSM - google - vgg 20_80
Sample # 20
Sample # 40
Sample # 60
Sample # 80
Sample # 100
Sample # 120
Sample # 140
Sample # 160
Sample # 180
Sample # 200
Elapsed seconds: 5.192724943161011
[⚔️ ADVERSARIAL] CW - google - vgg 20_80
Sample # 20
Sample # 40
Sample # 60
Sample # 80
Sample # 100
Sample # 120
Sample # 140
Sample # 160
Sample # 180
Sample # 200
Elapsed seconds: 80.07771897315979
[⚔️ ADVERSARIAL] FAB - google - vgg 20_80
Sample # 20
Sample # 40
Sample # 60
Sample # 80
Sample # 100
Sample # 120
Sample # 140
Sample # 160
Sample # 180
Sample # 200
Elapsed seconds: 307.9132173061371
[⚔️ ADVERSARIAL] DIFGSM - google - vgg 20_80
Sample # 20
Sample # 40
Sample # 60
Sample # 80
Sample # 100
Sample # 120
Sample # 140
Sample # 160
Sample # 180
Sample # 200
Elapsed seconds: 130.43215894699097
[⚔️ ADVERSARIAL] DeepFool - google - vgg 20_80
Sample # 20
Sample # 40
Sample # 60
Sample # 80
Sample # 100
Sample # 120
Sample # 140
Sample

In [None]:
import pandas as pd

timesEvaluationsDF = pd.DataFrame(timesEvaluations)

In [None]:
timesEvaluationsDF

In [None]:
timesEvaluationsDF.to_csv("timesEvaluations.csv")