In [1]:
import os

import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from torch import nn
from torch.utils.data import random_split, Subset, DataLoader
from torchmetrics import Metric

from torchvision import models, datasets, transforms
from tqdm import tqdm

In [2]:
class ImageNet10DataModule(pl.LightningDataModule):
    def __init__(
        self,
        dataset_dir=os.path.join(".", "imagenet-10-dataset"),
        batch_size=16,
        num_workers=4,
        train_test_split=0.8
    ):
        super(ImageNet10DataModule, self).__init__()
        self.dataset_dir = dataset_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.train_test_split = train_test_split
        
    def setup(self, stage=None):
        self.train_transforms = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomCrop(240),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        self.test_transforms = transforms.Compose([
            transforms.Resize((240, 240)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        
        full_dataset = datasets.ImageFolder(root=self.dataset_dir)
        
        n_total = len(full_dataset)
        n_train = int(self.train_test_split * n_total)
        n_test = n_total - n_train
        
        train_subset, test_subset = random_split(full_dataset, [n_train, n_test])

        train_indices = train_subset.indices
        test_indices = test_subset.indices

        train_dataset = datasets.ImageFolder(root=self.dataset_dir, transform=self.train_transforms)
        test_dataset = datasets.ImageFolder(root=self.dataset_dir, transform=self.test_transforms)

        self.train_dataset = Subset(train_dataset, train_indices)
        self.test_dataset = Subset(test_dataset, test_indices)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
            persistent_workers=True,
            pin_memory=True
        )

    def val_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            persistent_workers=True,
            pin_memory=True
        )
    
    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
            persistent_workers=True,
            pin_memory=True
        )

In [3]:
class EfficientNetLightningModel(pl.LightningModule):
    def __init__(self, n_classes=10):
        super(EfficientNetLightningModel, self).__init__()
        self.model = models.efficientnet_b1(weights=models.EfficientNet_B1_Weights.IMAGENET1K_V2)
        self.model.classifier[1] = nn.Linear(self.model.classifier[1].in_features, n_classes)
        self.criterion = nn.CrossEntropyLoss()
        
    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        logits = self(inputs)
        loss = self.criterion(logits, labels)
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def validation_step(self, batch, batch_idx):
        images, labels = batch
        logits = self(images)
        loss = self.criterion(logits, labels)
        predictions = torch.argmax(logits, dim=1)
        acc = torch.sum(predictions == labels.data).item() / len(labels)

        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        
        return loss

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

In [157]:
model = EfficientNetLightningModel()

In [158]:
data_module = ImageNet10DataModule()

In [None]:
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    dirpath="./checkpoints",
    filename="best-checkpoint",
    save_top_k=1,
    mode="min"
)

trainer = pl.Trainer(
    max_epochs=25,
    devices=1,
    accelerator="gpu",
    callbacks=[checkpoint_callback]
)

trainer.fit(model, data_module)

In [6]:
best_model = EfficientNetLightningModel.load_from_checkpoint("./checkpoints/best-checkpoint-v1.ckpt")
best_model = best_model.to("cuda")

In [None]:
best_model.eval()

In [205]:
trainer.validate(best_model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
C:\Data\University\master\master_2_year_sem_1\data-security-in-machine-learning\.venv\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Validation DataLoader 0: 100%|██████████| 163/163 [00:13<00:00, 11.86it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         val_acc            0.9923076629638672
        val_loss           0.027110787108540535
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 0.027110787108540535, 'val_acc': 0.9923076629638672}]

# FGSM Attack

In [4]:
def fgsm_attack(image, epsilon, grad):
    perturbed_image = image + epsilon * grad.sign()
    return torch.clamp(perturbed_image, -2.5, 2.5)

In [5]:
class Accuracy(Metric):
    def __init__(self):
        super().__init__()
        self.add_state("correct", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("total", default=torch.tensor(0), dist_reduce_fx="sum")

    @torch.no_grad()
    def update(self, preds, target):
        preds = torch.argmax(preds, dim=1)
        self.correct += torch.sum(preds == target).cpu()
        self.total += target.size(0)

    def compute(self):
        return self.correct.float() / self.total.float()

    def reset(self):
        self.correct = torch.tensor(0)
        self.total = torch.tensor(0)

In [10]:
fgsm_data_module = ImageNet10DataModule()
fgsm_data_module.setup()
fgsm_dataloader = fgsm_data_module.val_dataloader()

In [8]:
def launch_fgsm_attack(model_to_attack, eps_range, data_loader):
    accuracy = Accuracy()
    model_to_attack.eval()
    
    for eps in eps_range:
        accuracy.reset()
        total_loss = 0
        
        with torch.amp.autocast("cuda"):
            for data, target in tqdm(data_loader):
                data, target = data.to("cuda"), target.to("cuda")
                data.requires_grad = True
                
                output = model_to_attack(data)
                loss = nn.functional.cross_entropy(output, target)
                model_to_attack.zero_grad()
                loss.backward()
                
                data_grad = data.grad.data
                perturbed_data = fgsm_attack(data, eps, data_grad)
        
                with torch.no_grad():
                    output = model_to_attack(perturbed_data)
                    total_loss += nn.functional.cross_entropy(output, target).item()
                    accuracy.update(output, target)
                
            print(f"Accuracy for epsilon {eps:.2f}: {accuracy.compute()}, loss: {total_loss/len(data_loader)}")

In [227]:
launch_fgsm_attack(best_model, torch.arange(0.0, 0.5, 0.05), fgsm_dataloader)

100%|██████████| 163/163 [06:29<00:00,  2.39s/it]


Accuracy for epsilon 0.00: 0.9896153807640076, 0.03474518697868827


100%|██████████| 163/163 [06:12<00:00,  2.28s/it]


Accuracy for epsilon 0.05: 0.885769248008728, 0.9380737367126107


100%|██████████| 163/163 [06:08<00:00,  2.26s/it]


Accuracy for epsilon 0.10: 0.8853846192359924, 0.8899012136221663


100%|██████████| 163/163 [06:10<00:00,  2.27s/it]


Accuracy for epsilon 0.15: 0.885769248008728, 0.8262286912535597


100%|██████████| 163/163 [06:17<00:00,  2.31s/it]


Accuracy for epsilon 0.20: 0.8876923322677612, 0.7642550312119759


100%|██████████| 163/163 [06:12<00:00,  2.29s/it]


Accuracy for epsilon 0.25: 0.8873077034950256, 0.71040010776805


100%|██████████| 163/163 [06:10<00:00,  2.27s/it]


Accuracy for epsilon 0.30: 0.8846153616905212, 0.6685156404880658


100%|██████████| 163/163 [06:15<00:00,  2.30s/it]


Accuracy for epsilon 0.35: 0.8834615349769592, 0.6365417743021725


100%|██████████| 163/163 [05:43<00:00,  2.11s/it]


Accuracy for epsilon 0.40: 0.8819230794906616, 0.6123185606181987


100%|██████████| 163/163 [05:45<00:00,  2.12s/it]

Accuracy for epsilon 0.45: 0.8826923370361328, 0.5935469628568807





In [11]:
launch_fgsm_attack(best_model, torch.arange(0.1, 0.3, 0.1), fgsm_dataloader)

100%|██████████| 163/163 [02:11<00:00,  1.24it/s]


Accuracy for epsilon 0.10: 0.8876923322677612, 0.8727266201578034


100%|██████████| 163/163 [05:13<00:00,  1.92s/it]

Accuracy for epsilon 0.20: 0.8888461589813232, 0.7428377648255576



