In [1]:
from models.CIFAR10_Models import *
import torch
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import utils
from art import config
from art.utils import load_dataset, get_file
from art.estimators.classification import PyTorchClassifier
from art.attacks.evasion import FastGradientMethod, BasicIterativeMethod, ProjectedGradientDescent, DeepFool
from art.defences.trainer import AdversarialTrainer
from copy import deepcopy

  _torch_pytree._register_pytree_node(


In [None]:
# Load the data
BATCH_SIZE = 256
EPS = 0.2

torch.manual_seed(42)
np.random.seed(42)
generator = torch.Generator().manual_seed(42)

# Define the transformation
train_transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, translate=(0.1, 0.1)), 
    transforms.ToTensor(), 
    transforms.Normalize((0.1307,), (0.3081,)),
])

test_transform = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize((0.1307,), (0.3081,)),
])

# Load the CIFAR10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=train_transform, download=True)
test_dataset = datasets.CIFAR10(root='./data', train=False, transform=test_transform, download=True)

# Split the training dataset into training and validation datasets
train_dataset, val_dataset = random_split(train_dataset, [50000, 10000], generator=generator)

# Create the dataloaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

X_train, y_train = utils.loader_to_numpy(train_loader)
X_val, y_val = utils.loader_to_numpy(val_loader)
X_test, y_test = utils.loader_to_numpy(test_loader)

BATCH_SIZE = 512
EPS = 0.2

In [None]:
def harden_model(clean_model, clean_classifier, BATCH_SIZE, EPS, adv_model_path, attack_obj, plot_title):
    try:
        hardened_model = torch.load(adv_model_path)
        hardened_classifier = PyTorchClassifier(
            model=hardened_model,
            loss=torch.nn.CrossEntropyLoss(),
            optimizer=torch.optim.Adam(hardened_model.parameters(), lr=0.01),
            input_shape=(1, 28, 28),
            nb_classes=10,
            preprocessing=(0.1307, 0.3081),
            device_type='gpu'
        )
        if isinstance(attack_obj, DeepFool):
            attack = attack_obj(hardened_classifier, batch_size=BATCH_SIZE)
        else:
            attack = attack_obj(hardened_classifier, batch_size=BATCH_SIZE, eps=EPS)
    except:
        hardened_model = deepcopy(clean_model)
        hardened_classifier = PyTorchClassifier(
            model=hardened_model,
            loss=torch.nn.CrossEntropyLoss(),
            optimizer=torch.optim.Adam(hardened_model.parameters(), lr=0.01),
            input_shape=(1, 28, 28),
            nb_classes=10,
            preprocessing=(0.1307, 0.3081),
            device_type='gpu'
        )
        if isinstance(attack_obj, DeepFool):
            attack = attack_obj(hardened_classifier, batch_size=BATCH_SIZE)
        else:
            attack = attack_obj(hardened_classifier, batch_size=BATCH_SIZE, eps=EPS)
        adv_trainer = AdversarialTrainer(hardened_classifier, attacks=attack, ratio=0.5)
        adv_trainer.fit(X_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10)
        torch.save(adv_trainer.classifier.model, adv_model_path)
        hardened_classifier = adv_trainer.classifier
    
    
    eps_values = [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
    utils.compare_classifiers(clean_classifier, hardened_classifier, X_test, y_test, eps_values, batch_size=BATCH_SIZE, title=plot_title)
    
    print('Hardened model accuracy on clean test data: ', np.sum(np.argmax(hardened_classifier.predict(X_test), axis=1) == y_test) / len(y_test))
    print('Hardened model accuracy on PGD adversarial test data: ', utils.evaluate_attack(attack, hardened_classifier, X_test, y_test))

In [None]:
def run_adversarial_process(clean_model, BATCH_SIZE, EPS, adv_model_path, plot_title):
    clean_classifier = PyTorchClassifier(
    model=clean_model,
    loss=torch.nn.CrossEntropyLoss(),
    optimizer=torch.optim.Adam(clean_model.parameters(), lr=0.01),
    input_shape=(1, 28, 28),
    nb_classes=10,
    preprocessing=(0.1307, 0.3081),
    device_type='gpu'
)

    # Get the clean accuracy
    clean_acc = np.sum(np.argmax(clean_classifier.predict(X_test), axis=1) == y_test) / len(y_test)
    print(f'Clean accuracy: {clean_acc}')
    
    # Try FGSM
    attack = FastGradientMethod(clean_classifier, batch_size=BATCH_SIZE, eps=EPS)
    print('Accuracy on adversarial test data: ', utils.evaluate_attack(attack, clean_classifier, X_test, y_test))
    utils.plot_images(X_test, y_test, clean_classifier, attack, n=5)
    
    # Try BIM
    attack = BasicIterativeMethod(clean_classifier, batch_size=BATCH_SIZE, eps=EPS)
    print('Accuracy on adversarial test data: ', utils.evaluate_attack(attack, clean_classifier, X_test, y_test))
    utils.plot_images(X_test, y_test, clean_classifier, attack, n=5)
    
    # Try PGD
    attack = ProjectedGradientDescent(clean_classifier, batch_size=BATCH_SIZE, eps=EPS)
    print('Accuracy on adversarial test data: ', utils.evaluate_attack(attack, clean_classifier, X_test, y_test))
    utils.plot_images(X_test, y_test, clean_classifier, attack, n=5)
    
    # Try deepfool
    attack = DeepFool(clean_classifier, batch_size=BATCH_SIZE)
    print('Accuracy on adversarial test data: ', utils.evaluate_attack(attack, clean_classifier, X_test, y_test))
    utils.plot_images(X_test, y_test, clean_classifier, attack, n=5)
    
    # harden with BIM
    harden_model(clean_model, clean_classifier, BATCH_SIZE, EPS, adv_model_path, BasicIterativeMethod, plot_title + ' Hardened with BIM')
    
    # harden with PGD
    harden_model(clean_model, clean_classifier, BATCH_SIZE, EPS, adv_model_path, ProjectedGradientDescent, plot_title + ' Hardened with PGD')
    
    # harden with FGSM
    harden_model(clean_model, clean_classifier, BATCH_SIZE, EPS, adv_model_path, FastGradientMethod, plot_title + ' Hardened with FGSM')
    
    # harden with DeepFool
    harden_model(clean_model, clean_classifier, BATCH_SIZE, EPS, adv_model_path, DeepFool, plot_title + ' Hardened with DeepFool')

In [None]:
# load the FC model
clean_model = CIFAR10_FC500_100_10()
clean_model.load_state_dict(torch.load('models/clean_state/CIFAR10_FC_500_100_10.pth'))
clean_model.eval()

run_adversarial_process(clean_model, BATCH_SIZE, EPS, 'models/hardened_state/CIFAR10_FC_500_100_10.pth', 'CIFAR10_FC_500_100_10 Adversarial Accuracy')

In [None]:
# load the VGG model
clean_model = CIFAR10_VGG()
clean_model.load_state_dict(torch.load('models/clean_state/CIFAR10_VGG.pth'))
clean_model.eval()

run_adversarial_process(clean_model, BATCH_SIZE, EPS, 'models/hardened_state/CIFAR10_VGG.pth', 'CIFAR10_VGG Adversarial Accuracy')

In [None]:
# load the LeNet model
clean_model = CIFAR10_LeNet()
clean_model.load_state_dict(torch.load('models/clean_state/CIFAR10_LeNet.pth'))
clean_model.eval()

run_adversarial_process(clean_model, BATCH_SIZE, EPS, 'models/hardened_state/CIFAR10_LeNet.pth', 'CIFAR10_LeNet Adversarial Accuracy')

In [None]:
# load the GoogLeNet model
clean_model = CIFAR10_GoogLeNet()
clean_model.load_state_dict(torch.load('models/clean_state/CIFAR10_GoogLeNet.pth'))
clean_model.eval()

run_adversarial_process(clean_model, BATCH_SIZE, EPS, 'models/hardened_state/CIFAR10_GoogLeNet.pth', 'CIFAR10_GoogLeNet Adversarial Accuracy')

In [None]:
# load the ResNet model
clean_model = CIFAR10_ResNet()
clean_model.load_state_dict(torch.load('models/clean_state/CIFAR10_ResNet.pth'))
clean_model.eval()

run_adversarial_process(clean_model, BATCH_SIZE, EPS, 'models/hardened_state/CIFAR10_ResNet.pth', 'CIFAR10_ResNet Adversarial Accuracy')